In [None]:
# %% Import Libraries
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import warnings
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier, StackingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
warnings.filterwarnings('ignore')

# %% Load Dataset
df = pd.read_excel('updated_file.xlsx')

# %% Exploratory Data Analysis
print("Dataset Info:")
df.info()

print("\nDataset Description:")
print(df.describe())

print("\nClass Distribution:")
print(df['CLASS ID'].value_counts())

# %% Feature Selection and Target Variable
X = df.drop('CLASS ID', axis=1)
y = df['CLASS ID']

# %% Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# %% Address Class Imbalance using SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# %% Feature Scaling
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)

# %% Define Base Models and Meta Model
adaboost = AdaBoostClassifier(random_state=42)
nb_classifier = GaussianNB()
meta_model = LogisticRegression()

# %% Create Stacking Classifier
stacked_model = StackingClassifier(
    estimators=[('adaboost', adaboost), ('naive_bayes', nb_classifier)],
    final_estimator=meta_model
)

# %% Define Hyperparameter Grid
param_grid = {
    'adaboost__n_estimators': [50, 100],
    'adaboost__learning_rate': [0.01, 0.1],
    'final_estimator__C': [0.1, 1, 10]
}

# %% Perform Grid Search for Hyperparameter Tuning
grid_search = GridSearchCV(
    estimator=stacked_model,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    verbose=2,
    n_jobs=-1
)
grid_search.fit(X_train_resampled, y_train_resampled)

# Retrieve the best model
best_stacked_model = grid_search.best_estimator_

# %% Model Evaluation
y_pred = best_stacked_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nBest Hyperparameters:")
print(grid_search.best_params_)

# %% Save the Best Model
with open('best_model.pkl', 'wb') as f:
    pickle.dump(best_stacked_model, f)

# %% Load and Use the Model
with open('best_model.pkl', 'rb') as f:
    model = pickle.load(f)

# %% Test with a Single Instance
def predict_instance(model, instance):
    try:
        # Scale the input instance
        scaled_instance = scaler.transform(instance)

        # Make predictions
        raw_prediction = model.predict(scaled_instance)  # Raw prediction (class label)
        probabilities = model.predict_proba(scaled_instance)[:, 1]  # Probability of diabetes

        print(f"Raw Prediction (Class Label): {raw_prediction[0]}")
        print(f"Probability: {probabilities[0]:.2f}")
        if raw_prediction[0] == 1:
            print("Diabetes Detected")
        else:
            print("No Diabetes Detected")
    except Exception as e:
        print(f"Error: {e}")

# Example Instance: Replace with your test data
test_instance = np.array([[45, 1, 120, 195, 1]])  # Example instance
predict_instance(model, test_instance)

test_instance2 = np.array([[25, 0, 90, 140, 0]])  # Another example
predict_instance(model, test_instance2)

test_instance3 = np.array([[36, 1, 110, 141, 0]])  # Another example
predict_instance(model, test_instance3)

test_instance4 = np.array([[50, 0, 104, 139, 0]])  # Another example
predict_instance(model, test_instance4)

test_instance5 = np.array([[67, 1, 109, 138, 0]])  # Another example
predict_instance(model, test_instance5)

test_instance6 = np.array([[22, 0, 124, 200, 1]])  # Another example
predict_instance(model, test_instance6)

test_instance7 = np.array([[56, 1, 90, 80, 0]])  # Another example
predict_instance(model, test_instance7)