In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:

df = pd.read_csv("diabetes.csv")  # replace with actual path if needed
df.head()


In [None]:

X = df.drop('Outcome', axis=1)
y = df['Outcome']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)


In [None]:

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
print("Resampled dataset shape:", X_resampled.shape)


In [None]:

rf = RandomForestClassifier(class_weight='balanced', random_state=42)
rf.fit(X_resampled, y_resampled)
y_rf_pred = rf.predict(X_test)

print("Random Forest Results:")
print(classification_report(y_test, y_rf_pred))


In [None]:

xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_resampled, y_resampled)
y_probs = xgb.predict_proba(X_test)[:, 1]
y_xgb_thresh = (y_probs > 0.4).astype(int)

print("XGBoost (Threshold 0.4) Results:")
print(classification_report(y_test, y_xgb_thresh))


In [None]:

sns.heatmap(confusion_matrix(y_test, y_xgb_thresh), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix (XGBoost Threshold 0.4)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
