# Model 5 with cross validation

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_recall_curve, average_precision_score, roc_auc_score, roc_curve
import numpy as np

In [2]:
model_df = pd.read_csv("status.csv")
model_df.head()

Unnamed: 0,hypertension,heart_disease,diabetes,problematic,gender,smoking_history_No Info,smoking_history_current,smoking_history_not current,age,bmi,HbA1c_level,blood_glucose_level
0,0,1,0,0,0,0.0,0.0,1.0,1.0,-0.645658,0.127273,-0.454545
1,0,0,0,0,0,1.0,0.0,0.0,0.349349,-0.595938,0.127273,-1.0
2,0,0,0,0,1,0.0,0.0,1.0,-0.301301,-0.595938,-0.2,-0.290909
3,0,0,0,0,0,0.0,1.0,0.0,-0.101101,-0.686275,-0.454545,-0.318182
4,1,1,0,0,1,0.0,1.0,0.0,0.8999,-0.763539,-0.527273,-0.318182


In [3]:
# Define features (X) and target (y)
X = model_df.drop("diabetes",axis=1)
y = model_df["diabetes"]

In [4]:
# Function to build a neural network model
def Model_building (input_dim):
    # Build a neural network model 
    model = Sequential()
    model.add(Dense(128, input_shape=(input_dim,), activation='tanh'))
    model.add(Dense(128, activation='tanh'))
    model.add(Dense(64, activation='tanh'))
    model.add(Dense(2, activation='softmax'))

    # Compile the model with categorical crossentropy loss and Adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.005), metrics=['accuracy',"recall"])
    model.summary()

    return model

In [5]:
# Stratified 3-Fold Cross Validation
kfold = StratifiedKFold (n_splits=3, shuffle=True)

# Lists to store results for each fold
verify_accuracy = []
verify_ap = []
all_confusion_matrices = []
all_classification_reports = []

# Loop through each fold
for train_idx, test_idx in kfold.split(X, y):  
    # Split train/test folds
    X_train_fold = X.iloc[train_idx]
    X_test_fold = X.iloc[test_idx]
    y_train_fold = y.iloc[train_idx]
    y_test_fold = y.iloc[test_idx]

    rus = RandomUnderSampler(random_state=42)
    X_train_rus, y_train_rus = rus.fit_resample(X_train_fold, y_train_fold)
    
    encoder = OneHotEncoder(sparse_output=False, drop=None)
    y_train_encoded = encoder.fit_transform(y_train_rus.to_frame())
    y_test_encoded = encoder.transform(y_test_fold.to_frame())

    # Build a model for each fold
    predictor = Model_building (X_train_rus.shape[1])   
    # Train the model
    h = predictor.fit (X_train_rus, 
                       y_train_encoded, 
                       verbose=False, 
                       epochs=50, 
                       batch_size = 128
                      )

    # Predictions on test fold
    y_pred_prob_test = predictor.predict(X_test_fold, verbose=0)
    y_pred_test = np.argmax(y_pred_prob_test, axis=1)
    y_true = np.argmax(y_test_encoded, axis=1)

    
    # Compute performance metrics
    accuracy = accuracy_score(y_true, y_pred_test)
    ap = average_precision_score(y_true, y_pred_prob_test[:, 1])
    cm = confusion_matrix(y_true, y_pred_test)
    cr = classification_report(y_true, y_pred_test)
    
    # Store results
    verify_accuracy.append(100 * accuracy)
    verify_ap.append(ap)
    all_confusion_matrices.append(cm)
    all_classification_reports.append(cr)

    # Print results for current fold
    print("Accuracy: ", accuracy)
    print("Average Precision:", ap)
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(cr)

    
# Print final summary across all folds
print("Final summary across all folds")
print(f"Accuracy Mean: {np.mean(verify_accuracy):.2f}%")
print(f"AP Mean: {np.mean(verify_ap):.4f}")
  

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-08-24 22:42:41.554519: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-08-24 22:42:41.554620: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-08-24 22:42:41.554639: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-08-24 22:42:41.554866: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-08-24 22:42:41.554901: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


2025-08-24 22:42:42.046803: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Accuracy:  0.9105856937109937
Average Precision: 0.8757247187658411
Confusion Matrix:
[[27808  2686]
 [  294  2540]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.91      0.95     30494
           1       0.49      0.90      0.63      2834

    accuracy                           0.91     33328
   macro avg       0.74      0.90      0.79     33328
weighted avg       0.95      0.91      0.92     33328



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Accuracy:  0.9199747952110902
Average Precision: 0.8832128718231347
Confusion Matrix:
[[28131  2363]
 [  304  2529]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.92      0.95     30494
           1       0.52      0.89      0.65      2833

    accuracy                           0.92     33327
   macro avg       0.75      0.91      0.80     33327
weighted avg       0.95      0.92      0.93     33327



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Accuracy:  0.9274762204818916
Average Precision: 0.8820297918466438
Confusion Matrix:
[[28423  2071]
 [  346  2487]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.93      0.96     30494
           1       0.55      0.88      0.67      2833

    accuracy                           0.93     33327
   macro avg       0.77      0.90      0.82     33327
weighted avg       0.95      0.93      0.93     33327

Final summary across all folds
Accuracy Mean: 91.93%
AP Mean: 0.8803
