# Model 10 with Cross Validation

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [2]:
model_df = pd.read_csv("status.csv")
model_df.head()

Unnamed: 0,hypertension,heart_disease,diabetes,problematic,gender,smoking_history_No Info,smoking_history_current,smoking_history_not current,age,bmi,HbA1c_level,blood_glucose_level
0,0,1,0,0,0,0.0,0.0,1.0,1.0,-0.645658,0.127273,-0.454545
1,0,0,0,0,0,1.0,0.0,0.0,0.349349,-0.595938,0.127273,-1.0
2,0,0,0,0,1,0.0,0.0,1.0,-0.301301,-0.595938,-0.2,-0.290909
3,0,0,0,0,0,0.0,1.0,0.0,-0.101101,-0.686275,-0.454545,-0.318182
4,1,1,0,0,1,0.0,1.0,0.0,0.8999,-0.763539,-0.527273,-0.318182


In [3]:
# Define features (X) and target (y)
X = model_df.drop("diabetes",axis=1)
y = model_df["diabetes"]

In [4]:
# Function to build a neural network model
def Model_building (input_dim):
    model = Sequential()
    model.add(Dense(128, input_shape=(input_dim,), activation='tanh'))
    model.add(Dense(128, activation='tanh'))
    model.add(Dense(64, activation='tanh'))
    model.add(Dense(2, activation='softmax'))

    # Compile the model with categorical crossentropy loss and Adam optimizer
    model.compile(loss='categorical_crossentropy', optimizer="Adam", metrics=['accuracy', 'recall'])
    model.summary()
    return model

In [5]:
# Stratified 3-Fold Cross Validation
kfold = StratifiedKFold (n_splits=3, shuffle=True)

# Lists to store results for each fold
verify_accuracy = []
verify_auc = []
all_confusion_matrices = []
all_classification_reports = []

# Loop through each fold
for train_idx, test_idx in kfold.split(X, y):  
    # Split train/test folds
    X_train_fold = X.iloc[train_idx]
    X_test_fold = X.iloc[test_idx]
    y_train_fold = y.iloc[train_idx]
    y_test_fold = y.iloc[test_idx]
    
    encoder = OneHotEncoder(sparse_output=False, drop=None)
    y_train_encoded = encoder.fit_transform(y_train_fold.to_frame())
    y_test_encoded = encoder.transform(y_test_fold.to_frame())

    # Build a model for each fold
    predictor = Model_building (X_train_fold.shape[1])   
    # Train the model
    h = predictor.fit (X_train_fold, 
                       y_train_encoded, 
                       verbose=False, 
                       epochs=50, 
                       batch_size = 128
                      )

    # Predictions on test fold
    y_pred_prob_test = predictor.predict(X_test_fold, verbose=0)
    y_pred_test = np.argmax(y_pred_prob_test, axis=1)
    y_true = np.argmax(y_test_encoded, axis=1)

    
    # Compute performance metrics
    accuracy = accuracy_score(y_true, y_pred_test)
    auc = roc_auc_score(y_true, y_pred_prob_test[:, 1])
    cm = confusion_matrix(y_true, y_pred_test)
    cr = classification_report(y_true, y_pred_test)
    
    # Store results
    verify_accuracy.append(100 * accuracy)
    verify_auc.append(auc)
    all_confusion_matrices.append(cm)
    all_classification_reports.append(cr)

    # Print results for current fold
    print("Accuracy: ", accuracy)
    print("AUC:", auc)
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(cr)

    
# Print final summary across all folds
print("Final summary across all folds")
print(f"Accuracy Mean: {np.mean(verify_accuracy):.2f}%")
print(f"AUC Mean: {np.mean(verify_auc):.4f}")
  

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-08-24 16:45:24.247379: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-08-24 16:45:24.247733: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-08-24 16:45:24.247739: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-08-24 16:45:24.247938: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-08-24 16:45:24.247947: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


2025-08-24 16:45:24.730217: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Accuracy:  0.9738058089294287
AUC: 0.9776322079440967
Confusion Matrix:
[[30474    20]
 [  853  1981]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99     30494
           1       0.99      0.70      0.82      2834

    accuracy                           0.97     33328
   macro avg       0.98      0.85      0.90     33328
weighted avg       0.97      0.97      0.97     33328



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Accuracy:  0.9719446694872026
AUC: 0.9768499012761992
Confusion Matrix:
[[30463    31]
 [  904  1929]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98     30494
           1       0.98      0.68      0.80      2833

    accuracy                           0.97     33327
   macro avg       0.98      0.84      0.89     33327
weighted avg       0.97      0.97      0.97     33327



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Accuracy:  0.9727248177153659
AUC: 0.9776471046215778
Confusion Matrix:
[[30451    43]
 [  866  1967]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99     30494
           1       0.98      0.69      0.81      2833

    accuracy                           0.97     33327
   macro avg       0.98      0.85      0.90     33327
weighted avg       0.97      0.97      0.97     33327

Final summary across all folds
Accuracy Mean: 97.28%
AUC Mean: 0.9774
