# TP : Système Expert Médical avec Règles d'Associations

**Objectifs** :
1. Créer une base PostgreSQL locale
2. Aggréger des données de sources hétérogènes
3. Comparer Apriori vs FP-Growth
4. Déployer une API de diagnostic

---

## 1. Configuration de l'Environnement

### 1.1 Installation des Librairies
Exécutez cette cellule **une seule fois** :

In [None]:
import requests
import json
import pandas as pd

def fetch_drugs():
    url = "https://api.fda.gov/drug/label.json"
    params = {'limit': 5}  # Limite à 5 pour l'exemple
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return pd.json_normalize(response.json()['results'])
    else:
        print("Erreur API:", response.status_code)
        return pd.DataFrame()

# Récupérer les données
drugs_df = fetch_drugs()

# Afficher pour vérifier
print(drugs_df.head())

In [None]:
!pip install faker

In [1]:
import requests
import pandas as pd

# Récupération des données brutes
url = "https://mockapi.io/endpoint/patients"
response = requests.get(url)
raw_data = response.json()

# Conversion en DataFrame
patients_api_df = pd.json_normalize(
    raw_data,
    sep="_"
)

print("Colonnes brutes de l'API :", patients_api_df.columns.tolist())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
import json
import random

from faker import Faker
from datetime import datetime, timedelta

fake = Faker('fr_FR')

# Map diagnostic => symptômes typiques
diagnosisSymptomsMap = {
    "grippe": ["fièvre", "toux", "fatigue", "courbatures"],
    "COVID-19": ["fièvre", "toux sèche", "perte d’odorat", "fatigue"],
    "migraine": ["maux de tête", "nausées", "sensibilité à la lumière"],
    "angine": ["gorge irritée", "fièvre", "douleur en avalant"],
    "varicelle": ["éruption cutanée", "fièvre", "démangeaisons"],
    "asthme": ["essoufflement", "sifflements", "toux"],
    "allergie": ["éternuements", "yeux rouges", "démangeaisons"],
    "gastro-entérite": ["nausées", "vomissements", "diarrhée", "crampes"],
    "hypertension": ["maux de tête", "vertiges", "saignement de nez"]
}

def generatePatient(i):
    diagnosis = random.choice(list(diagnosisSymptomsMap.keys()))
    symptoms = random.sample(diagnosisSymptomsMap[diagnosis], k=random.randint(2, 4))

    patient = {
        "patient_id": f"P{str(i).zfill(3)}",
        "age": random.randint(18, 90),
        "gender": random.choice(["M", "F"]),
        "symptoms": symptoms,
        "diagnosis": diagnosis,
        "timestamp": (datetime.now() - timedelta(days=random.randint(0, 365))).isoformat(),
        "notes": fake.sentence() if random.random() > 0.3 else None
    }
    return patient

# Générer 50 patients
patients = [generatePatient(i) for i in range(1, 51)]

# Écrire dans un fichier JSON
with open("patients.json", "w", encoding="utf-8") as f:
    json.dump(patients, f, indent=2, ensure_ascii=False)

print("✅ patients.json généré avec succès (50 entrées)")


In [None]:
import psycopg2
import random
from faker import Faker
from datetime import datetime, timedelta

fake = Faker('fr_FR')

# Connexion à PostgreSQL
conn = psycopg2.connect(
    host="localhost",
    database="medical_db",
    user="med_user",
    password="med_pass"
)
cursor = conn.cursor()

diagnosisMap = {
    "grippe": ["fièvre", "toux", "fatigue", "courbatures"],
    "COVID-19": ["fièvre", "toux sèche", "perte d’odorat", "fatigue"],
    "migraine": ["maux de tête", "nausées", "sensibilité à la lumière"],
    "angine": ["gorge irritée", "fièvre", "douleur en avalant"],
    "asthme": ["essoufflement", "sifflements", "toux"],
    "allergie": ["éternuements", "yeux rouges", "démangeaisons"],
    "hypertension": ["maux de tête", "vertiges", "saignement de nez"]
}

statuses = ["terminée", "en attente", "en cours"]
medicalCenters = ["CHU Lyon", "Clinique Pasteur", "Hôpital Saint-Louis", "Centre Médical Dijon"]

for i in range(50):
    patient_code = f"P{str(random.randint(1, 999)).zfill(3)}"
    consultation_date = datetime.now() - timedelta(days=random.randint(0, 180))
    diagnosis = random.choice(list(diagnosisMap.keys()))
    symptoms = random.sample(diagnosisMap[diagnosis], k=random.randint(2, 4))
    doctor = f"Dr {fake.last_name()}"
    center = random.choice(medicalCenters)
    status = random.choice(statuses)

    cursor.execute("""
        INSERT INTO consultations
        (patient_code, consultation_date, symptoms, diagnosis, doctor_name, medical_center, status)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
    """, (
        patient_code,
        consultation_date,
        symptoms,
        diagnosis,
        doctor,
        center,
        status
    ))

conn.commit()
cursor.close()
conn.close()
print("✅ 50 consultations insérées dans PostgreSQL")


In [None]:
# 🧠 INTENTION GENERALE
# Construire un système expert médical qui prend des symptômes en entrée
# et retourne une maladie probable, en se basant sur des données réelles (PostgreSQL, JSON, API).

# 1. CHARGEMENT ET NETTOYAGE DES DONNÉES -------------------------------------------------

import pandas as pd
import json
import psycopg2
import requests

# Chargement JSON
with open("patients.json", "r", encoding="utf-8") as f:
    patients = json.load(f)
patients_df = pd.json_normalize(patients)
patients_df["symptoms"] = patients_df["symptoms"].apply(lambda x: [s.strip().lower() for s in x])

# Chargement PostgreSQL
db_conn = psycopg2.connect(
    host="localhost",
    database="medical_db",
    user="med_user",
    password="med_pass"
)
consultations_df = pd.read_sql("SELECT * FROM consultations;", db_conn)
db_conn.close()
consultations_df["symptoms"] = consultations_df["symptoms"].apply(lambda x: [s.strip().lower() for s in x])

# Chargement API (openFDA)
url = "https://api.fda.gov/drug/label.json"
params = {"limit": 50}
api_response = requests.get(url, params=params)
api_data = api_response.json()
api_df = pd.json_normalize(api_data["results"])
api_df["brand_name"] = api_df["openfda.brand_name"].apply(lambda x: x[0] if isinstance(x, list) else None)
api_df["purpose"] = api_df["purpose"].apply(lambda x: x[0] if isinstance(x, list) else None)
api_df["indications"] = api_df["indications_and_usage"].apply(lambda x: x[0] if isinstance(x, list) else None)
api_df = api_df[["brand_name", "purpose", "indications"]].dropna()

In [None]:

# 2. FUSION DES SOURCES JSON + PostgreSQL
patients_flat = patients_df[["patient_id", "age", "gender", "symptoms", "diagnosis"]].copy()
patients_flat["source"] = "json"
consultations_flat = consultations_df[["patient_code", "symptoms", "diagnosis"]].copy()
consultations_flat = consultations_flat.rename(columns={"patient_code": "patient_id"})
consultations_flat["age"] = None
consultations_flat["gender"] = None
consultations_flat["source"] = "postgresql"
combined_df = pd.concat([patients_flat, consultations_flat], ignore_index=True)
combined_df["diagnosis"] = combined_df["diagnosis"].str.strip().str.lower()
combined_df["symptoms"] = combined_df["symptoms"].apply(lambda x: [s.strip().lower() for s in x])
combined_df.to_csv("clean_medical_records.csv", index=False)
api_df.to_csv("openfda_meds.csv", index=False)

In [None]:
# 3. EXTRACTION DE MOTIFS FREQUENTS AVEC FP-GROWTH -----------------------------
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

# Encodage des transactions
df = pd.read_csv("clean_medical_records.csv")
transactions = df["symptoms"].apply(eval).tolist()
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)
df_encoded["diagnosis"] = df["diagnosis"]

# FP-Growth : on cherche les combinaisons de symptômes fréquents
frequent_itemsets = fpgrowth(df_encoded.drop("diagnosis", axis=1), min_support=0.05, use_colnames=True)

# Générer des règles d'association avec un seuil faible
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.001)
rules = rules[rules["consequents"].apply(lambda x: any(d in x for d in df["diagnosis"].unique()))]


In [None]:
# 4. SYSTEME EXPERT --------------------------------------------------------------
def predictDiagnosis(symptomsInput, rules):
    symptomsSet = set(symptomsInput)
    matchingRules = rules[rules["antecedents"].apply(lambda x: x.issubset(symptomsSet))]
    if matchingRules.empty:
        return "Aucun diagnostic trouvé"
    sortedRules = matchingRules.sort_values(by=["confidence", "lift"], ascending=False)
    return list(sortedRules["consequents"].values[0])[0]


In [None]:

# 5. API FLASK -------------------------------------------------------------------
from flask import Flask, request, jsonify
import threading

app = Flask(__name__)

@app.route("/diagnose", methods=["POST"])
def diagnose():
    data = request.get_json()
    symptoms = data.get("symptoms", [])
    if not symptoms:
        return jsonify({"error": "Aucun symptôme fourni"}), 400
    predicted = predictDiagnosis(symptoms, rules)
    return jsonify({"diagnosis": predicted})

def runApi():
    app.run(debug=True, use_reloader=False)

thread = threading.Thread(target=runApi)
thread.start()


In [None]:
import requests

r = requests.post("http://localhost:5000/diagnose", json={"symptoms": ['fièvre']})
print(r.json())
