<h2> Les Imports </h2>

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC


<h2> Chargement de données </h2>
les données nettoyées et selectionées: heart_selected_features.csv

In [3]:
df = pd.read_csv("heart_selected_features.csv")
df.head()

Unnamed: 0,cp_4.0,thal_7.0,oldpeak,thalach,slope_2.0,ca_2.0,ca_3.0,ca_1.0,trestbps,sex,target
0,False,False,1.087338,0.017197,False,False,False,False,0.757525,1.0,0
1,True,False,0.397182,-1.821905,True,False,True,False,1.61122,1.0,1
2,True,True,1.346147,-0.902354,True,True,False,False,-0.6653,1.0,1
3,False,False,2.122573,1.637359,False,False,False,False,-0.09617,1.0,0
4,False,False,0.310912,0.980537,False,False,False,False,-0.09617,0.0,0


<h2> Séparation X/y & Train / Test

In [4]:
X = df.drop("target", axis=1)
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

<h2> Les fonction a Utiliser pour Evaluation </h2>

In [5]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    return {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred)
    }

<h2> Modele: Régression Logistique </h2>

In [6]:
log_reg = LogisticRegression(max_iter=2000)
param_grid_lr = {
    "C": [0.1, 1, 5, 10],
    "solver": ["liblinear", "lbfgs"]
}
grid_lr = GridSearchCV(
    log_reg,
    param_grid_lr,
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)
grid_lr.fit(X_train, y_train)
best_lr = grid_lr.best_estimator_
print("Meilleurs paramètres (Logistic Regression) :", grid_lr.best_params_)

Meilleurs paramètres (Logistic Regression) : {'C': 5, 'solver': 'liblinear'}


In [7]:
print("--"*15, "Evaluation", "--"*15)
lr_results = evaluate_model(best_lr, X_test, y_test)
lr_results

------------------------------ Evaluation ------------------------------


{'Accuracy': 0.8852459016393442,
 'Precision': 0.8620689655172413,
 'Recall': 0.8928571428571429,
 'F1 Score': 0.8771929824561403}

<h2> Modele: Random Forest

In [8]:
rf = RandomForestClassifier()
param_dist_rf = {
    "n_estimators": [50, 100, 150, 200, 300],
    "max_depth": [None, 5, 10, 20],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}
rand_rf = RandomizedSearchCV(
    rf,
    param_dist_rf,
    cv=5,
    scoring="accuracy",
    n_iter=20,
    random_state=42,
    n_jobs=-1
)
rand_rf.fit(X_train, y_train)
best_rf = rand_rf.best_estimator_

print("Meilleurs paramètres (Random Forest) :", rand_rf.best_params_)

Meilleurs paramètres (Random Forest) : {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 5}


In [9]:
print("--"*15, "Evaluation", "--"*15)
rf_results = evaluate_model(best_rf, X_test, y_test)
rf_results

------------------------------ Evaluation ------------------------------


{'Accuracy': 0.8688524590163934,
 'Precision': 0.8333333333333334,
 'Recall': 0.8928571428571429,
 'F1 Score': 0.8620689655172413}

<h2> Modele: SVM </h2>

In [10]:
svm_model = SVC()
param_grid_svm = {
    "C": [0.1, 1, 5, 10],
    "kernel": ["linear", "rbf"],
    "gamma": ["scale", "auto"]
}
grid_svm = GridSearchCV(
    svm_model,
    param_grid_svm,
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)
grid_svm.fit(X_train, y_train)
best_svm = grid_svm.best_estimator_
print("Meilleurs paramètres (SVM) :", grid_svm.best_params_)

Meilleurs paramètres (SVM) : {'C': 5, 'gamma': 'scale', 'kernel': 'linear'}


In [11]:
print("--"*15, "Evaluation", "--"*15)
svm_results = evaluate_model(best_svm, X_test, y_test)
svm_results

------------------------------ Evaluation ------------------------------


{'Accuracy': 0.8360655737704918,
 'Precision': 0.8,
 'Recall': 0.8571428571428571,
 'F1 Score': 0.8275862068965517}

<h2> Comparaison Finale </h2>

In [12]:
results = pd.DataFrame({
    "LogReg": lr_results,
    "RandomForest": rf_results,
    "SVM": svm_results
})
results

Unnamed: 0,LogReg,RandomForest,SVM
Accuracy,0.885246,0.868852,0.836066
Precision,0.862069,0.833333,0.8
Recall,0.892857,0.892857,0.857143
F1 Score,0.877193,0.862069,0.827586


<h2> Export du Meilleur Modele </h2>

In [13]:
import joblib

best_model_name = results.loc["F1 Score"].idxmax()
if best_model_name == "LogReg":
    best_model = best_lr
elif best_model_name == "RandomForest":
    best_model = best_rf
else:
    best_model = best_svm

joblib.dump(best_model, "final_model.pkl")

print("Modèle exporté sous : best_model.pkl")
print("Meilleur modèle :", best_model_name)

Modèle exporté sous : best_model.pkl
Meilleur modèle : LogReg
