In [1]:
# !pip install --upgrade faker

In [2]:
# !pip install jupyterlab

In [3]:
# !pip install --upgrade pandas

In [4]:
# !pip install --upgrade scikit-learn

In [None]:
import pandas as pd
import random
from faker import Faker

# Initialisation
fake = Faker()
sexes = ['Homme', 'Femme']
maladie_symptomes = {
'Paludisme': ['Fièvre', 'Frissons', 'Fatigue', 'Nausée'],
'Grippe': ['Fièvre', 'Toux', 'Fatigue', 'Maux de tête'],
'COVID-19': ['Fièvre', 'Toux', 'Fatigue', 'Douleurs musculaires'],
'Tuberculose': ['Toux', 'Fatigue', 'Fièvre', 'Perte de poids'],
'Diabète': ['Fatigue', 'Soif excessive', 'Mictions fréquentes'],
'Hypertension': ['Maux de tête', 'Fatigue', 'Vertiges'],
'Hépatite': ['Nausée', 'Fatigue', 'Douleurs abdominales', 'Jaunisse']
}

# Générateur de données
def generate_realistic_patient_data(n):
data = []
for _ in range(n):
    age = random.randint(1, 100)
    sexe = random.choice(sexes)
    maladie = random.choice(list(maladie_symptomes.keys()))
    symptome_pool = maladie_symptomes[maladie]
    symptomes_patient = random.sample(symptome_pool, k=min(len(symptome_pool), random.randint(2, 4)))
    fumeur = random.choice(['Oui', 'Non'])
    tension = round(random.uniform(9.0, 16.0), 1)
    temperature = round(random.uniform(36.0, 40.5), 1)
    data.append({
        'Age': age,
        'Sexe': sexe,
        'Symptomes': ", ".join(symptomes_patient),
        'Fumeur': fumeur,
        'Tension (cmHg)': tension,
        'Température (°C)': temperature,
        'Maladie': maladie
    })
return pd.DataFrame(data)

# Génération et sauvegarde
df = generate_realistic_patient_data(100)
df.to_csv("donnees_patients_realistes.csv", index=False)
print("✅ Fichier CSV généré : donnees_patients_realistes.csv")


: 

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

: 

In [None]:

from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [1]:
# Chargement du fichier CSV
df = pd.read_csv("donnees_patients_realistes.csv")

NameError: name 'pd' is not defined

In [None]:
df.head(10)

Unnamed: 0,Age,Sexe,Symptomes,Fumeur,Tension (cmHg),Température (°C),Maladie
0,52,Homme,"Fatigue, Fièvre, Maux de tête",Oui,12.2,36.8,Grippe
1,62,Homme,"Fatigue, Vertiges, Maux de tête",Oui,15.2,38.3,Hypertension
2,1,Homme,"Toux, Maux de tête, Fièvre",Non,12.4,36.1,Grippe
3,13,Femme,"Fièvre, Toux, Maux de tête",Oui,14.8,40.4,Grippe
4,16,Femme,"Fatigue, Maux de tête",Non,13.5,40.4,Hypertension
5,80,Femme,"Maux de tête, Fièvre, Toux, Fatigue",Oui,12.1,36.3,Grippe
6,30,Femme,"Fatigue, Maux de tête, Fièvre, Toux",Non,12.4,36.6,Grippe
7,99,Femme,"Nausée, Fièvre, Fatigue",Non,10.8,38.4,Paludisme
8,97,Homme,"Fièvre, Perte de poids",Non,14.7,39.2,Tuberculose
9,34,Homme,"Frissons, Nausée, Fièvre",Non,13.2,38.3,Paludisme


In [None]:
# Transformation des symptômes en liste
df['Symptomes'] = df['Symptomes'].apply(lambda x: x.split(', '))


In [None]:
# Colonnes numériques
numeriques = ['Age', 'Tension (cmHg)', 'Température (°C)']
X_num = df[numeriques]
scaler = StandardScaler()
X_num_scaled = pd.DataFrame(scaler.fit_transform(X_num), columns=numeriques)

In [None]:
# Colonnes catégorielles (Sexe, Fumeur)
X_cat = df[['Sexe', 'Fumeur']]
ohe = OneHotEncoder(sparse_output=False)
X_cat_encoded = pd.DataFrame(ohe.fit_transform(X_cat), columns=ohe.get_feature_names_out(X_cat.columns))

In [None]:
# Symptômes (encodage multilabel)
mlb = MultiLabelBinarizer()
X_symptoms = pd.DataFrame(mlb.fit_transform(df['Symptomes']), columns=mlb.classes_)

In [None]:
# Construction de X final
X = pd.concat([X_num_scaled, X_cat_encoded, X_symptoms], axis=1)
y = df['Maladie']

In [None]:
# Séparation entraînement/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Modèle Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
print(model.score(X_test, y_test))

0.865


In [None]:
# Prédictions
y_pred = model.predict(X_test)

# Rapport de performance
print("🔍 Rapport de classification :")
print(classification_report(y_test, y_pred))

🔍 Rapport de classification :
              precision    recall  f1-score   support

    COVID-19       0.68      0.70      0.69        33
     Diabète       1.00      1.00      1.00        21
      Grippe       0.81      0.74      0.77        23
Hypertension       0.96      1.00      0.98        23
    Hépatite       0.90      1.00      0.95        28
   Paludisme       1.00      0.86      0.92        35
 Tuberculose       0.79      0.84      0.82        37

    accuracy                           0.86       200
   macro avg       0.88      0.88      0.88       200
weighted avg       0.87      0.86      0.87       200



In [None]:
import joblib

# Sauvegarde du modèle et des encodeurs
joblib.dump(model, "modele_prediction_maladie.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(ohe, "onehot_encoder.pkl")
joblib.dump(mlb, "symptom_encoder.pkl")

print("✅ Modèle et encodeurs sauvegardés.")



✅ Modèle et encodeurs sauvegardés.


In [None]:
!pip install --prefer-binary pyarrow

Collecting pyarrow
  Using cached pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl.metadata (3.3 kB)
Using cached pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl (29.0 MB)
Installing collected packages: pyarrow
Successfully installed pyarrow-17.0.0


In [None]:
!pip install streamlit

Collecting streamlit
  Using cached streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Using cached altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting cachetools<7,>=4.0 (from streamlit)
  Using cached cachetools-6.1.0-py3-none-any.whl.metadata (5.4 kB)
Collecting tenacity<10,>=8.1.0 (from streamlit)
  Using cached tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Using cached GitPython-3.1.44-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Using cached pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Using cached gitdb-4.0.12-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Using cached smmap-5.0.2-py3-none-any.whl.metadata (4.3 kB)
Using cached streamlit-1.46.1-py3-none-any.whl (10.1 

In [None]:

import subprocess
import time
from IPython.display import IFrame, display



# 1. Lance Streamlit en arrière-plan
cmd = "streamlit run app.py"
process = subprocess.Popen(cmd, shell=True)

# 2. Attendre quelques secondes que Streamlit démarre
time.sleep(5)

# 3. Afficher l'interface Streamlit dans un iframe dans le notebook
display(IFrame(src='http://localhost:8501', width=900, height=700))

# Note : pour arrêter Streamlit, exécute la commande suivante dans une autre cellule :
# process.terminate()





      👋 Welcome to Streamlit!

      If you'd like to receive helpful onboarding emails, news, offers, promotions,
      and the occasional swag, please enter your email address below. Otherwise,
      leave this field blank.

      Email:  