In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score, classification_report
import warnings

warnings.filterwarnings('ignore')

# Charger le dataset
df = pd.read_csv('diabetes.csv')

# Préparer les données (X pour les caractéristiques, y pour la variable cible)
X = df.drop(columns=['Diabetes_012'])
y = df['Diabetes_012']

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### Modèle 1 : K-Nearest Neighbors
knn_params = {'n_neighbors': range(1, 20)}
knn = KNeighborsClassifier()
knn_grid = GridSearchCV(knn, knn_params, cv=5, scoring='accuracy')
knn_grid.fit(X_train, y_train)
best_knn = knn_grid.best_estimator_
y_pred_knn = best_knn.predict(X_test)

accuracy_knn = accuracy_score(y_test, y_pred_knn)
recall_knn = recall_score(y_test, y_pred_knn, average='weighted')
f1_knn = f1_score(y_test, y_pred_knn, average='weighted')

print("KNN - Meilleur nombre de voisins :", knn_grid.best_params_)
print("KNN - Accuracy :", accuracy_knn)
print("KNN - Recall :", recall_knn)
print("KNN - F1 Score :", f1_knn)
print("KNN - Rapport de classification :")
print(classification_report(y_test, y_pred_knn))

### Modèle 2 : Decision Tree
tree_params = {'max_depth': range(1, 20)}
tree = DecisionTreeClassifier(random_state=42)
tree_grid = GridSearchCV(tree, tree_params, cv=5, scoring='accuracy')
tree_grid.fit(X_train, y_train)
best_tree = tree_grid.best_estimator_
y_pred_tree = best_tree.predict(X_test)

accuracy_tree = accuracy_score(y_test, y_pred_tree)
recall_tree = recall_score(y_test, y_pred_tree, average='weighted')
f1_tree = f1_score(y_test, y_pred_tree, average='weighted')

print("\nDecision Tree - Meilleure profondeur :", tree_grid.best_params_)
print("Decision Tree - Accuracy :", accuracy_tree)
print("Decision Tree - Recall :", recall_tree)
print("Decision Tree - F1 Score :", f1_tree)
print("Decision Tree - Rapport de classification :")
print(classification_report(y_test, y_pred_tree))

### Modèle 3 : Random Forest
rf_params = {'n_estimators': [50, 100, 150], 'max_depth': [5, 10, 15, None]}
rf = RandomForestClassifier(random_state=42)
rf_grid = GridSearchCV(rf, rf_params, cv=5, scoring='accuracy')
rf_grid.fit(X_train, y_train)
best_rf = rf_grid.best_estimator_
y_pred_rf = best_rf.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf, average='weighted')
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')

print("\nRandom Forest - Meilleurs paramètres :", rf_grid.best_params_)
print("Random Forest - Accuracy :", accuracy_rf)
print("Random Forest - Recall :", recall_rf)
print("Random Forest - F1 Score :", f1_rf)
print("Random Forest - Rapport de classification :")
print(classification_report(y_test, y_pred_rf))

### Comparaison des modèles
print("\nComparaison des performances des modèles :")
print(f"KNN -> Accuracy: {accuracy_knn}, Recall: {recall_knn}, F1 Score: {f1_knn}")
print(f"Decision Tree -> Accuracy: {accuracy_tree}, Recall: {recall_tree}, F1 Score: {f1_tree}")
print(f"Random Forest -> Accuracy: {accuracy_rf}, Recall: {recall_rf}, F1 Score: {f1_rf}")

# Résumer le modèle avec la meilleure performance globale en F1-score
best_model = max(
    ((f1_knn, "KNN"), (f1_tree, "Decision Tree"), (f1_rf, "Random Forest")),
    key=lambda x: x[0]
)
print(f"\nLe modèle avec le meilleur F1-score est : {best_model[1]} avec un F1-score de {best_model[0]}")


KNN - Meilleur nombre de voisins : {'n_neighbors': 18}
KNN - Accuracy : 0.8434904867024072
KNN - Recall : 0.8434904867024072
KNN - F1 Score : 0.7968368332208058
KNN - Rapport de classification :
              precision    recall  f1-score   support

         0.0       0.86      0.98      0.91     64180
         1.0       0.00      0.00      0.00      1425
         2.0       0.48      0.12      0.19     10499

    accuracy                           0.84     76104
   macro avg       0.45      0.37      0.37     76104
weighted avg       0.79      0.84      0.80     76104


Decision Tree - Meilleure profondeur : {'max_depth': 6}
Decision Tree - Accuracy : 0.8482865552401976
Decision Tree - Recall : 0.8482865552401976
Decision Tree - F1 Score : 0.8014098321135305
Decision Tree - Rapport de classification :
              precision    recall  f1-score   support

         0.0       0.86      0.99      0.92     64180
         1.0       0.00      0.00      0.00      1425
         2.0       0.56 