# √âtude de cas : Pr√©diction des maladies cardiaques
Ce notebook explore un jeu de donn√©es simul√© inspir√© du dataset Kaggle *Heart Disease UCI*. L'objectif est de construire un mod√®le de machine learning pour pr√©dire si une personne est atteinte d'une maladie cardiaque (classification binaire).

## 1. Chargement des donn√©es

In [None]:
import pandas as pd

# Chargement du jeu de donn√©es simul√©
df = pd.DataFrame({
    'Age': [40, 49, 37, 48, 61, 60, 58, 50, 45, 39],
    'Sex': [1, 0, 1, 1, 0, 1, 0, 1, 1, 0],
    'ChestPainType': ['ATA', 'NAP', 'ASY', 'NAP', 'ATA', 'TA', 'ASY', 'ATA', 'NAP', 'NAP'],
    'RestingBP': [140, 130, 120, 110, 150, 140, 135, 128, 132, 130],
    'Cholesterol': [289, 250, 240, 230, 300, 280, 310, 270, 260, 245],
    'FastingBS': [0, 0, 0, 1, 1, 0, 1, 0, 1, 0],
    'RestingECG': ['Normal', 'ST', 'LVH', 'Normal', 'LVH', 'Normal', 'ST', 'LVH', 'ST', 'Normal'],
    'MaxHR': [172, 160, 150, 165, 140, 135, 128, 155, 148, 162],
    'ExerciseAngina': ['N', 'Y', 'Y', 'N', 'Y', 'Y', 'N', 'N', 'Y', 'N'],
    'Oldpeak': [0.0, 1.5, 2.3, 0.6, 1.4, 2.0, 0.5, 0.3, 1.2, 0.0],
    'ST_Slope': ['Up', 'Flat', 'Down', 'Up', 'Flat', 'Down', 'Up', 'Flat', 'Flat', 'Up'],
    'HeartDisease': [0, 1, 1, 0, 1, 1, 0, 0, 1, 0]
})
df.head()

## 2. Pr√©traitement des donn√©es

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# S√©paration des features et de la cible
X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

# Colonnes num√©riques et cat√©gorielles
num_cols = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']
cat_cols = ['Sex', 'ChestPainType', 'FastingBS', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

## 3. Construction du pipeline

In [None]:
# Pr√©processeurs
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer([
    ('num', numeric_transformer, num_cols),
    ('cat', categorical_transformer, cat_cols)
])

# Pipeline complet
pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

## 4. Entra√Ænement et √©valuation du mod√®le

In [None]:
# S√©paration entra√Ænement/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entra√Ænement
pipeline.fit(X_train, y_train)

# Pr√©dictions et rapport
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))

## 5. Comparaison avec un autre mod√®le : Support Vector Machine

In [None]:
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline

# Nouveau pipeline avec SVM
pipeline_svm = Pipeline([
    ('preprocess', preprocessor),
    ('classifier', SVC(kernel='rbf', probability=True, random_state=42))
])

# Entra√Ænement
pipeline_svm.fit(X_train, y_train)
y_pred_svm = pipeline_svm.predict(X_test)
print("Rapport de classification pour le mod√®le SVM :")
print(classification_report(y_test, y_pred_svm))

## 6. R√©sum√© de la comparaison

In [None]:
from sklearn.metrics import accuracy_score

acc_rf = accuracy_score(y_test, y_pred)
acc_svm = accuracy_score(y_test, y_pred_svm)

print(f"Exactitude Random Forest : {acc_rf:.2f}")
print(f"Exactitude SVM            : {acc_svm:.2f}")

if acc_rf > acc_svm:
    print("‚úÖ Le mod√®le Random Forest a donn√© de meilleurs r√©sultats.")
elif acc_rf < acc_svm:
    print("‚úÖ Le mod√®le SVM a donn√© de meilleurs r√©sultats.")
else:
    print("üîç Les deux mod√®les ont donn√© la m√™me exactitude.")