In [10]:

import numpy as np 
import pandas as pd
import seaborn as sns 
import warnings 
import joblib
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc, accuracy_score, classification_report, confusion_matrix, f1_score, recall_score, precision_score
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split, cross_val_score,GridSearchCV, StratifiedKFold
from scipy.stats import randint
from sklearn.ensemble import StackingClassifier
from scipy.stats import uniform
warnings.filterwarnings('ignore')
from sklearn.ensemble import AdaBoostClassifier, VotingClassifier

In [3]:
df = pd.read_excel('filtered_blood_sugar_data - Copy - Copy.xlsx')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1888 entries, 0 to 1887
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   AGE             1888 non-null   int64
 1   GENDER          1888 non-null   int64
 2   BLOOD PRESSURE  1888 non-null   int64
 3   BLOOD SUGAR     1888 non-null   int64
 4   PID FUN         1888 non-null   int64
 5   CLASS ID        1888 non-null   int64
dtypes: int64(6)
memory usage: 88.6 KB


In [5]:
df.describe()

Unnamed: 0,AGE,GENDER,BLOOD PRESSURE,BLOOD SUGAR,PID FUN,CLASS ID
count,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0
mean,46.407309,0.317797,90.440148,167.346398,0.148835,0.599047
std,16.033414,0.465743,29.66727,83.458539,0.35602,0.490221
min,21.0,0.0,24.0,24.0,0.0,0.0
25%,32.0,0.0,74.0,114.0,0.0,0.0
50%,46.0,0.0,90.0,144.0,0.0,1.0
75%,60.0,1.0,104.0,199.0,0.0,1.0
max,93.0,1.0,1034.0,1101.0,1.0,1.0


In [6]:
X=df.drop('CLASS ID',axis=1)
y=df['CLASS ID']

In [7]:
X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.2,random_state=0)#splitting data in 80% train, 20%test

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
# stacking classifier combined (adaboost + naive bayes) with gridsearch with hyper parameter tunned 
# Define base models
adaboost = AdaBoostClassifier()
nb_classifier = GaussianNB()

# Define meta model (Logistic Regression)
meta_model = LogisticRegression()

# Create Stacking Classifier
stacked_model = StackingClassifier(
    estimators=[('adaboost', adaboost), ('naive_bayes', nb_classifier)],
    final_estimator=meta_model
)

# Define hyperparameter grid for GridSearchCV
param_grid = {
    'adaboost__n_estimators': [50, 100],  # Number of estimators for AdaBoost
    'adaboost__learning_rate': [0.01, 0.1],  # Learning rate for AdaBoost
    'naive_bayes__var_smoothing': [1e-9, 1e-8],  # Variance smoothing for Naive Bayes
    'final_estimator__C': [0.1, 1, 10]  # Regularization strength for Logistic Regression
}

# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(
    estimator=stacked_model,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    verbose=2,  # Optional: Increase verbosity to see progress
    n_jobs=-1  # Use all available CPU cores
)
grid_search.fit(X_train, y_train)

# Retrieve the best model
best_stacked_model = grid_search.best_estimator_

# Make predictions
y_pred = best_stacked_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Display best hyperparameters
print("\nBest Hyperparameters:")
print(grid_search.best_params_)


Fitting 5 folds for each of 24 candidates, totalling 120 fits
Accuracy: 0.8810

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.93      0.86       148
           1       0.95      0.85      0.90       230

    accuracy                           0.88       378
   macro avg       0.87      0.89      0.88       378
weighted avg       0.89      0.88      0.88       378


Confusion Matrix:
[[137  11]
 [ 34 196]]

Best Hyperparameters:
{'adaboost__learning_rate': 0.01, 'adaboost__n_estimators': 50, 'final_estimator__C': 1, 'naive_bayes__var_smoothing': 1e-09}


In [12]:
# %% Save the trained model
joblib.dump(best_stacked_model, 'model.pkl')  # Save the best model
print("Model saved as model.pkl")

# %% Verify the saved model
loaded_model = joblib.load('model.pkl')
sample_input = X_test[0].reshape(1, -1)  # Replace with appropriate input shape
predicted = loaded_model.predict(sample_input)
print(f"Sample Prediction: {predicted}")

Model saved as model.pkl
Sample Prediction: [1]
