In [None]:
# ======================================================
# 0) INSTALLATION & SETUP
# ======================================================
# Exécutez ces lignes une seule fois lors de la première installation.

# !pip install sentence-transformers faiss-cpu numpy pandas requests --quiet

# Install Ollama (Si pas déjà installé)
# !curl -fsSL https://ollama.com/install.sh | sh

# Start Ollama server
import subprocess, time
# subprocess.Popen(["ollama", "serve"])
# time.sleep(6)

# Pull model - OPTIMISATION VITESSE ⚡
# On utilise "phi3" (3.8B) au lieu de "mistral-nemo" (12B) pour diviser le temps par 3 ou 4.
!ollama pull phi3

>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l


In [None]:
import pandas as pd

df = pd.read_csv("rag_clean.csv")
df.head()


Unnamed: 0,Disease,Symptoms
0,abdominal aortic aneurysm,"shortness of breath, arm swelling, burning abd..."
1,abdominal hernia,"symptoms of the scrotum and testes, sharp abdo..."
2,abscess of nose,"sore throat, irritable infant, vomiting, fever..."
3,abscess of the lung,"depressive or psychotic symptoms, itchy eyelid..."
4,abscess of the pharynx,"sharp chest pain, sore throat cough, nasal con..."


In [None]:
import numpy as np
import faiss
import os
import pickle
from sentence_transformers import SentenceTransformer

print("Nettoyage du dataset...")

df_clean = df.dropna(subset=["Disease", "Symptoms"])

df_unique = df_clean.groupby("Disease")["Symptoms"].apply(
    lambda s: " ".join(set(" ".join(s).split()))
).reset_index()

df_unique.columns = ["Disease", "Symptoms"]

print("Nombre de maladies uniques :", len(df_unique))

Nettoyage du dataset...
Nombre de maladies uniques : 773


In [None]:
embed = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

if os.path.exists("rag_index.faiss") and os.path.exists("rag_docs.pkl"):
    print("Chargement de l'index existant (Cache)...")
    index = faiss.read_index("rag_index.faiss")
    with open("rag_docs.pkl", "rb") as f:
        docs = pickle.load(f)
else:
    print("Génération des embeddings et de l'index (Cela peut prendre du temps)...")
    texts = (df_unique["Disease"] + " : " + df_unique["Symptoms"]).tolist()

    emb = embed.encode(
        texts,
        batch_size=32,              # ← vitesse x2
        show_progress_bar=True
    ).astype("float32")

    d = emb.shape[1]
    index = faiss.IndexHNSWFlat(d, 16)   # ← HNSW plus léger & plus rapide
    index.hnsw.efConstruction = 30
    index.add(emb)

    docs = [
        {"disease": row["Disease"], "symptoms": row["Symptoms"]}
        for _, row in df_unique.iterrows()
    ]
    
    # Sauvegarde pour la prochaine fois
    faiss.write_index(index, "rag_index.faiss")
    with open("rag_docs.pkl", "wb") as f:
        pickle.dump(docs, f)

print("RAG index prêt ✔️")

Batches:   0%|          | 0/25 [00:00<?, ?it/s]

RAG index prêt ✔️


In [None]:
def rag_search(query, k=5):
    q_emb = embed.encode([query]).astype("float32")
    D, I = index.search(q_emb, k)

    out = []
    for dist, idx in zip(D[0], I[0]):
        doc = docs[int(idx)]
        score = float(1 / (1 + float(dist)))
        out.append({
            "disease": doc["disease"],
            "symptoms": doc["symptoms"],
            "score": score
        })
    return out


In [None]:
SYSTEM_PROMPT = """
You are a medical assistant.
Analyze symptoms conservatively.
Return ONLY a JSON array, like:
[
  {"disease": "...", "probability": 0-100, "explanation": "..."}
]
No extra text.
"""

def build_prompt(symptoms, rag_docs):
    ctx = "\n".join(
        [f"Doc {i+1}: {d['disease']} | {d['symptoms']}" for i, d in enumerate(rag_docs)]
    )

    return f"""
{SYSTEM_PROMPT}

Patient symptoms: {symptoms}

Relevant medical context:
{ctx}

Answer ONLY with the JSON array.
"""


In [None]:
import requests
import re
import json

def call_llm(prompt):
    payload = {
        "model": "phi3",       # ⚡ MODIFIÉ : Utilisation de phi3 pour la vitesse
        "prompt": prompt,
        "stream": False,
        "temperature": 0.0,
        "max_tokens": 500      # Un peu plus de marge
    }

    try:
        r = requests.post("http://localhost:11434/api/generate", json=payload)
        r.raise_for_status()
        raw = r.json()["response"]
    except Exception as e:
        print(f"Erreur de connexion à Ollama : {e}")
        return []

    # Extraction JSON robuste
    # On cherche le premier crochet ouvrant [ et le dernier fermant ]
    match = re.search(r"\[.*\]", raw, re.DOTALL)
    if not match:
        print("DEBUG RAW:", raw) 
        # Tentative de réparation si le modèle a oublié les crochets
        if "{" in raw:
             match = re.search(r"\{.*\}", raw, re.DOTALL)
             if match:
                 return [json.loads(match.group(0))]
        return [{"disease": "Erreur d'analyse", "probability": 0, "explanation": "Le modèle n'a pas renvoyé de JSON valide."}]

    try:
        return json.loads(match.group(0))
    except json.JSONDecodeError:
        print("Erreur de décodage JSON. Brut :", raw)
        return []

In [None]:
def diagnose(symptoms_list):
    if not symptoms_list:
        raise ValueError("Provide a list of symptoms")

    symptoms = ", ".join(symptoms_list)

    # 1) RAG context
    rag_docs = rag_search(symptoms)

    # 2) Prompt
    prompt = build_prompt(symptoms, rag_docs)

    # 3) LLM
    diagnosis = call_llm(prompt)

    return {
        "input": symptoms_list,
        "rag_context": rag_docs,
        "diagnosis": diagnosis
    }


In [None]:
def format_diagnosis_human(result):
    """Affiche le diagnostic sous forme de rapport lisible."""
    print(f"\n{'='*40}")
    print(f"🏥 RAPPORT DE DIAGNOSTIC IA")
    print(f"{'='*40}")
    print(f"📋 Symptômes analysés : {', '.join(result['input'])}")
    print(f"{'-'*40}\n")
    
    print("🔍 ANALYSE DES POSSIBILITÉS :\n")
    for diag in result['diagnosis']:
        prob = diag.get('probability', 0)
        disease = diag.get('disease', 'Inconnu')
        explanation = diag.get('explanation', '')
        
        # Bar de confiance visuelle
        bar_len = int(prob / 5)
        bar = "█" * bar_len + "░" * (20 - bar_len)
        
        print(f"🔹 **{disease}** (Confiance : {prob}%)")
        print(f"   {bar}")
        print(f"   📝 {explanation}\n")
        
    print(f"{'='*40}")
    print("⚠️ AVERTISSEMENT : Ce résultat est généré par une IA.")
    print("Consultez impérativement un professionnel de santé.")
    print(f"{'='*40}\n")

def generate_friendly_response(diagnosis_json):
    """Utilise Mistral pour générer une réponse empathique."""
    print("💬 Génération de la réponse du Dr. Mistral en cours...")
    
    prompt = f"""
    Tu es un assistant médical bienveillant et professionnel.
    Voici les résultats bruts d'une analyse de symptômes : {json.dumps(diagnosis_json)}
    
    Tâche : Rédige une réponse courte et rassurante pour le patient.
    - Résume les causes possibles mentionnées (sans donner de pourcentages précis).
    - Explique brièvement pourquoi ces causes sont envisagées.
    - Termine par un conseil fort de consulter un médecin.
    - Ton ton doit être calme, empathique et clair.
    - Réponds en Français.
    """
    
    payload = {
        "model": "mistral-nemo", 
        "prompt": prompt,
        "stream": False,
        "temperature": 0.7
    }
    
    try:
        r = requests.post("http://localhost:11434/api/generate", json=payload)
        return r.json()["response"]
    except Exception as e:
        return f"Erreur lors de la génération : {e}"

In [None]:
# Exécution du diagnostic
symptomes_patient = ["dizziness", "insomnia", "chest tightness"]

print(f"⏳ Analyse en cours pour : {symptomes_patient} ...")
start_time = time.time()

result = diagnose(symptomes_patient)

end_time = time.time()
print(f"✅ Terminé en {round(end_time - start_time, 2)} secondes.")

# Affichage du rapport formaté directement
format_diagnosis_human(result)

# Optionnel : Afficher la réponse empathique
# print("-" * 40)
# print(generate_friendly_response(result['diagnosis']))

{'input': ['dizziness', 'insomnia', 'chest tightness'],
 'rag_context': [{'disease': 'ischemic heart disease',
   'symptoms': 'breathing breathing, tight tightness chest of muscle shortness fatigue, lymphedema tight, throat fatigue pain feels dizziness, breath, edema, dizziness palpitations peripheral sharp edema palpitations, pain, difficulty tightness,',
   'score': 0.6284035261310359},
  {'disease': 'acute stress reaction',
   'symptoms': 'insomnia chest nervousness burning involuntary of shortness movements, headache, anxiety symptoms psychotic fatigue pain abdominal nervousness, or dizziness, breath, dizziness movements and depressive sharp abnormal breath headache depression, insomnia, pain, symptoms,',
   'score': 0.6251411179124602},
  {'disease': 'hypertensive heart disease',
   'symptoms': 'breathing recent swelling, tightness insomnia pregnancy chest weakness of shortness fatigue, leg heartburn fatigue pain weakness, tightness, breath, swelling palpitations sharp heartburn, 