In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

import warnings
warnings.simplefilter('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier 
from sklearn.ensemble import BaggingClassifier
import lightgbm as lgb

from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix,f1_score,precision_score,roc_auc_score,roc_curve

In [None]:
df = pd.read_csv('C:\Users\asd\Desktop\Diabetes-Prediction\data\processed\cleaned.csv')

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = df.drop('Outcome', axis=1)
y = df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, RocCurveDisplay
import matplotlib.pyplot as plt

models = {
    'Logistic_Regression': LogisticRegression(max_iter=1000, random_state=42),
    'SVC': SVC(kernel='linear', probability=True, max_iter=1000, random_state=42),  # set single kernel & probability=True for ROC AUC
    'RandomForest': RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None,
                                          max_leaf_nodes=None, min_samples_leaf=1, n_jobs=-1,
                                          random_state=42),
    'DecisionTree': DecisionTreeClassifier(random_state=42),
    'ExtraTree': ExtraTreesClassifier(random_state=42),
    'NaiveBayes': BernoulliNB(),
    'XGB': XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    'KNN': KNeighborsClassifier(),
    'GradientBoost': GradientBoostingClassifier(random_state=42),
    'AdaBoost': AdaBoostClassifier(random_state=42),
    'SGD': SGDClassifier(max_iter=1000, tol=1e-3, random_state=42),
    'Bagging': BaggingClassifier(random_state=42),
    'Lightgbm': lgb.LGBMClassifier(random_state=42),
    'MLP': MLPClassifier(max_iter=1000, random_state=42)
}



In [None]:
for name, model in models.items():
    print(f"--- {name} ---")
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")
    
    conf_mat = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(conf_mat)
    
    class_report = classification_report(y_test, y_pred)
    print("Classification Report:")
    print(class_report)
    
    # Calculate ROC AUC score - works only for binary or multilabel with probabilities
    try:
        if hasattr(model, "predict_proba"):
            y_probs = model.predict_proba(X_test_scaled)[:, 1]  # probability for positive class
        elif hasattr(model, "decision_function"):
            y_probs = model.decision_function(X_test_scaled)
        else:
            y_probs = None
        
        if y_probs is not None and len(set(y_test)) == 2:  # binary classification check
            roc_score = roc_auc_score(y_test, y_probs)
            print(f"ROC AUC Score: {roc_score:.4f}")
            
            # Plot ROC curve
            RocCurveDisplay.from_estimator(model, X_test_scaled, y_test)
            plt.title(f"ROC Curve - {name}")
            plt.show()
        else:
            print("ROC AUC not available for multiclass or models without probability/decision_function")
    except Exception as e:
        print(f"ROC AUC calculation failed: {e}")
    
    print("\n")