In [None]:
from psycopg import Cursor
from openai import OpenAI
import os
import psycopg

In [None]:
conversation_file_path = "../data/conversation.txt"
openai_client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
db_connection_str = "dbname=chatbot_rag user=odoo password=odoo host=localhost port=5432"

In [None]:
def create_conversation_list(file_path: str) -> list[str]:
    with open(file_path, encoding="utf-8") as file:
        text = file.read()
        text_list = text.split("\n")
        filtered_list = [chaine.removeprefix("     ") for chaine in text_list if not chaine.startswith("<")]
        return filtered_list
    
def calculate_embedding(corpus: str, client: OpenAI) -> list[float]:
    embeddings = client.embeddings.create(
        model="text-embedding-ada-002",
        input=corpus,
        encoding_format="float"
    ).data
    return embeddings[0].embedding

def save_embedding(corpus: str, embedding: list[float], cursor: Cursor) -> None:
    cursor.execute("""
        INSERT INTO embeddings (corpus, embedding) VALUES (%s, %s)
    """, (corpus, embedding))

def retrieve_similar_corpus(input_corpus: str, client: OpenAI, db_connection_str: str) -> tuple[int, str, list[float]]:
    input_corpus_embedding = calculate_embedding(corpus=input_corpus, client=client)
    with psycopg.connect(db_connection_str) as conn:
        with conn.cursor() as cur:
            query = """
                SELECT id, corpus, embedding
                FROM embeddings
                ORDER BY embedding <=> %s::vector
                LIMIT 1;
            """
            cur.execute(query, [input_corpus_embedding])
            result = cur.fetchone()
            return result
        
def generate_response(input_corpus: str, client: OpenAI = openai_client, db_connection_str: str=db_connection_str):
    similar_text = retrieve_similar_corpus(input_corpus=input_corpus, client=client, db_connection_str=db_connection_str)[1]
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "Vous êtes un assistant chatbot serviable travaillant dans le service d'accueil d'une université. Vous devez reformuler des réponses extraites d'une base de données de manière cohérente et compréhensible pour l'utilisateur."},
            {
                "role": "user",
                "content": similar_text
            }
        ]
    )
    return completion.choices[0].message.content
    

In [None]:
import psycopg
import numpy as np

with psycopg.connect(db_connection_str) as conn:
    with conn.cursor() as cur:
        cur.execute(""" DROP TABLE embeddings""")
        cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")

        cur.execute("""
            CREATE TABLE IF NOT EXISTS embeddings (
                    id serial PRIMARY KEY,
                    corpus text,
                    embedding vector(1536)
            );
        """)

        corpus_list = create_conversation_list(file_path=conversation_file_path)
        for corpus in corpus_list:
            embedding = calculate_embedding(corpus=corpus, client=openai_client)
            save_embedding(corpus=corpus, embedding=embedding, cursor=cur)

        conn.commit()

In [None]:
user_message = "Où se trouve le site?"
retrieve_similar_corpus(input_corpus=user_message, client=openai_client, db_connection_str=db_connection_str)

In [None]:
generate_response(input_corpus=user_message)