In [2]:
from collections import Counter
import pandas as pd

df = pd.read_csv("diabetes.csv")

X = df.drop("Outcome", axis=1)
y = df["Outcome"]

print(Counter(y))


Counter({0: 500, 1: 268})


In [4]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(
    sampling_strategy={1: 468}, 
    random_state=200
)

X_smote, y_smote = smote.fit_resample(X, y)

In [5]:
print(Counter(y_smote))
print("Total records:", len(y_smote))

Counter({0: 500, 1: 468})
Total records: 968


In [28]:
df_smote = pd.concat([X_smote, y_smote], axis=1)
df_smote.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [29]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [30]:
X_train, X_test, y_train, y_test = train_test_split(
    X_smote,
    y_smote,
    test_size=0.2,
    random_state=42,
    stratify=y_smote
)

In [18]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [19]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

In [20]:
svm = SVC(kernel="rbf")
svm.fit(X_train, y_train)

In [21]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

In [23]:
y_pred_lr = lr.predict(X_test)

print("Logistic Regression Classification Report")
print(classification_report(y_test, y_pred_lr))

Logistic Regression Classification Report
              precision    recall  f1-score   support

           0       0.72      0.76      0.74       100
           1       0.73      0.69      0.71        94

    accuracy                           0.73       194
   macro avg       0.73      0.73      0.73       194
weighted avg       0.73      0.73      0.73       194



In [24]:
y_pred_svm = svm.predict(X_test)

print("Support Vector Machine Classification Report")
print(classification_report(y_test, y_pred_svm))

Support Vector Machine Classification Report
              precision    recall  f1-score   support

           0       0.77      0.72      0.75       100
           1       0.72      0.78      0.75        94

    accuracy                           0.75       194
   macro avg       0.75      0.75      0.75       194
weighted avg       0.75      0.75      0.75       194



In [25]:
y_pred_rf = rf.predict(X_test)

print("Random Forest Classification Report")
print(classification_report(y_test, y_pred_rf))

Random Forest Classification Report
              precision    recall  f1-score   support

           0       0.84      0.82      0.83       100
           1       0.81      0.83      0.82        94

    accuracy                           0.82       194
   macro avg       0.82      0.82      0.82       194
weighted avg       0.82      0.82      0.82       194



In [26]:
df.shape


(768, 9)