In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings("ignore")

# -------------------------------------------------
# Step 1 — Load Dataset
# -------------------------------------------------
df = pd.read_csv("pima-indians-diabetes.data.csv", header=None)

df.columns = [
    'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
    'Insulin', 'BMI', 'DiabetesPedigree', 'Age', 'Outcome'
]

# -------------------------------------------------
# Step 2 — Basic Cleaning (replace zeros in some columns)
# -------------------------------------------------
cols_with_zero = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
for col in cols_with_zero:
    df[col] = df[col].replace(0, df[col].median())

# -------------------------------------------------
# Step 3 — Feature Selection
# -------------------------------------------------
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

# Normalize features (Prevents BernoulliNB warnings)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# -------------------------------------------------
# Step 4 — Train-test Split
# -------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)

# -------------------------------------------------
# Step 5 — Gaussian Naive Bayes
# -------------------------------------------------
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_gnb = gnb.predict(X_test)

# -------------------------------------------------
# Step 6 — Bernoulli Naive Bayes
# -------------------------------------------------
bnb = BernoulliNB()
bnb.fit(X_train, y_train)
y_pred_bnb = bnb.predict(X_test)

# -------------------------------------------------
# Step 7 — Evaluation
# -------------------------------------------------
acc_gnb = accuracy_score(y_test, y_pred_gnb)
f1_gnb = f1_score(y_test, y_pred_gnb)

acc_bnb = accuracy_score(y_test, y_pred_bnb)
f1_bnb = f1_score(y_test, y_pred_bnb)

print("====== GaussianNB Results ======")
print("Accuracy :", round(acc_gnb, 4))
print("F1 Score :", round(f1_gnb, 4))
print(classification_report(y_test, y_pred_gnb))

print("\n====== BernoulliNB Results ======")
print("Accuracy :", round(acc_bnb, 4))
print("F1 Score :", round(f1_bnb, 4))
print(classification_report(y_test, y_pred_bnb))


Accuracy : 0.7292
F1 Score : 0.6389
              precision    recall  f1-score   support

           0       0.80      0.76      0.78       123
           1       0.61      0.67      0.64        69

    accuracy                           0.73       192
   macro avg       0.71      0.72      0.71       192
weighted avg       0.74      0.73      0.73       192


Accuracy : 0.6406
F1 Score : 0.0282
              precision    recall  f1-score   support

           0       0.64      0.99      0.78       123
           1       0.50      0.01      0.03        69

    accuracy                           0.64       192
   macro avg       0.57      0.50      0.40       192
weighted avg       0.59      0.64      0.51       192

