In [14]:
# 1. Imports
import os
import json  # Ajout de l'import json
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.api.types import EmbeddingFunction
from sklearn.model_selection import train_test_split
import openai
import time  # Pour le délai entre les appels API

In [15]:
# 2. Charger la clé OpenAI
with open('./credentials/api.json') as f:
    data = json.load(f)
    OPENAI_API_KEY = data['OPENAI_API_KEY']
    openai.api_key = OPENAI_API_KEY

In [16]:
filename_data = '../datacreation/dialogues_embededd.pkl'
data = pd.read_pickle(filename_data)
train_data, test_data = train_test_split(data, test_size=0.05)
print("Train shape:", train_data.shape)
print("Test shape:", test_data.shape)

Train shape: (950, 6)
Test shape: (50, 6)


In [17]:
df = pd.read_csv('../datacreation/dialogues.csv', sep='\t')
df = df.dropna()
df.rename(columns={'Description':'Question', 'Doctor':'Answer'}, inplace=True)
df['combined'] = 'Question: ' + df.Question.str.strip() + '\nAnswer: ' + df.Answer.str.strip()
df = df.drop_duplicates().reset_index(drop=True)
df['ids'] = df.index
documents = df
print(documents.head(2))

                                            Question  \
0        Will masturbation cause weakness in nerves?   
1  Could lack of hair growth be due to masturbation?   

                                             Patient  \
0  Hi, may I answer your health queries right now...   
1  hai sir i am 25years old i used to do masturba...   

                                              Answer  \
0  Hi, Masturbation does make the nerves weak whe...   
1  Hi, Since you said you masturbate frequently e...   

                                            combined  ids  
0  Question: Will masturbation cause weakness in ...    0  
1  Question: Could lack of hair growth be due to ...    1  


In [18]:
class MiniLML6V2EmbeddingFunction(EmbeddingFunction):
    MODEL = SentenceTransformer('all-MiniLM-L6-v2')
    def __call__(self, texts):
        return MiniLML6V2EmbeddingFunction.MODEL.encode(texts).tolist()
emb_func = MiniLML6V2EmbeddingFunction()

  emb_func = MiniLML6V2EmbeddingFunction()


In [19]:
class ChromaWithUpsert:
    def __init__(
            self,
            name='openai_rag_collection',
            persist_directory=None,
            embedding_function=None,
            collection_metadata=None,
    ):
        if persist_directory is not None:
            self._client = chromadb.PersistentClient(path=persist_directory)
        else:
            self._client = chromadb.EphemeralClient()
        self._embedding_function = embedding_function
        self._persist_directory = persist_directory
        self._name = name
        self._collection = self._client.get_or_create_collection(
            name=self._name,
            embedding_function=self._embedding_function
            if self._embedding_function is not None
            else None,
            metadata=collection_metadata,
        )

    def upsert_texts(
        self,
        texts,
        metadata=None,
        ids=None,
        **kwargs
    ):
        texts_list = list(texts)
        if ids is None:
            import uuid
            ids = [str(uuid.uuid1()) for _ in texts_list]
        batch_size = 5000
        for i in range(0, len(texts_list), batch_size):
            end = min(i + batch_size, len(texts_list))
            batch_texts = texts_list[i:end]
            batch_ids = ids[i:end]
            batch_metadata = None if metadata is None else metadata[i:end]
            self._collection.upsert(
                metadatas=batch_metadata, 
                documents=batch_texts, 
                ids=batch_ids
            )
        return ids

    def is_empty(self):
        return self._collection.count() == 0

    def persist(self):
        if hasattr(self._client, 'persist'):
            self._client.persist()

    def query(self, query_texts, n_results=5):
        return self._collection.query(query_texts=query_texts, n_results=n_results)

In [20]:
knowledge_base_dir = '../datacreation/knowledge_base_openai'
os.makedirs(knowledge_base_dir, exist_ok=True)

chroma = ChromaWithUpsert(
    name='openai_rag_collection',
    embedding_function=emb_func,
    persist_directory=knowledge_base_dir,
)

if chroma.is_empty():
    batch_size = 100
    texts = documents.combined.tolist()
    metadata = [{'Question': q, 'ids': i} for q, i in zip(documents.Question, documents.ids)]
    ids = [str(i) for i in documents.ids]
    for i in range(0, len(texts), batch_size):
        end = min(i + batch_size, len(texts))
        batch_texts = texts[i:end]
        batch_metadata = metadata[i:end]
        batch_ids = ids[i:end]
        _ = chroma.upsert_texts(
            texts=batch_texts,
            metadata=batch_metadata,
            ids=batch_ids
        )
        chroma.persist()
    print("Indexation terminée !")
else:
    print("La base de connaissances est déjà indexée.")

La base de connaissances est déjà indexée.


In [21]:
question_texts = [q.strip("?") + "?" for q in test_data['Question'].tolist()]
print("\n".join(question_texts[:5]))

What is the treatment for premature ejaculation?
What causes premature ejaculation?
What causes relapse of chronic bacterial prostatitis?
What causes pain in penis?
Suggest treatment for swelling in scrotum and hematospermia?


In [22]:
relevant_contexts = []
for question_text in question_texts:
    relevant_chunks = chroma.query(
        query_texts=[question_text],
        n_results=5,
    )
    relevant_contexts.append(relevant_chunks)

In [23]:
def make_prompt(context, question_text):
    return (f"Veuillez répondre à la question suivante en vous appuyant sur le contexte fourni.\n"
            f"{context}\n\n"
            f"Question : {question_text}")

prompt_texts = []
for relevant_context, question_text in zip(relevant_contexts, question_texts):
    context = "\n\n".join(relevant_context["documents"][0])
    prompt_text = make_prompt(context, question_text)
    prompt_texts.append(prompt_text)

print(prompt_texts[0])

Veuillez répondre à la question suivante en vous appuyant sur le contexte fourni.
Question: Suggest treatment for premature ejaculation
Answer: Hi, This is a very common problem and unfortunately, there’s no medication for the same. The commonest cause of premature ejaculation is the anxiety of performance. A behavioural technique such as squeeze-pause technique, desensitizing creams and SRRI (selective serotonin reuptake inhibitors) are few common treatments useful in premature ejaculation. You should take consultation with a sex therapist, psychologist, or psychiatrist to be assessed properly. Hope I have answered your query. Let me know if I can assist you further. Take care Regards, Dr. Iven Romic Rommstein

Question: What is the treatment for premature ejaculation?
Answer: Hi,Premature ejaculation is mainly treated with psychotherapy and proper training.  If you are new to sex then it will automatically improve with time. In the resistant cases selective serotonin replace inhibito

In [24]:
def generate_openai_answer(prompt, model="gpt-3.5-turbo", temperature=0.2):
       response = openai.chat.completions.create(
           model=model,
           messages=[
               {"role": "system", "content": "Vous êtes un assistant médical compétent."},
               {"role": "user", "content": prompt}
           ],
           temperature=temperature,
           max_tokens=512
       )
       return response.choices[0].message.content

results = []
for prompt_text in prompt_texts[:10]:  # Limitez à 10 pour tester, puis élargissez
    try:
        answer = generate_openai_answer(prompt_text)
        results.append(answer)
        time.sleep(1)  # Pour éviter de dépasser le quota API
    except Exception as e:
        print("Erreur OpenAI:", e)
        results.append("Erreur API")

Erreur OpenAI: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
Erreur OpenAI: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
Erreur OpenAI: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
Erreur OpenAI: Error c

In [None]:
def generate_openai_answer(prompt, model="gpt-3.5-turbo", temperature=0.2):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "Vous êtes un assistant médical compétent."},
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
        max_tokens=512
    )
    return response['choices'][0]['message']['content']

results = []
for prompt_text in prompt_texts[:10]:  # Limitez à 10 pour tester, puis élargissez
    try:
        answer = generate_openai_answer(prompt_text)
        results.append(answer)
        time.sleep(1)  # Pour éviter de dépasser le quota API
    except Exception as e:
        print("Erreur OpenAI:", e)
        results.append("Erreur API")

Erreur OpenAI: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742

Erreur OpenAI: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742

Erreur OpenAI: 

You tried to access