In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import MinMaxScaler

# Load dataset
# Assuming the file "pima-indians-diabetes.data.csv" is in the same directory
column_names = [
    'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
    'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'
]
data = pd.read_csv('pima-indians-diabetes.data.csv', names=column_names)

# Display basic info
print("Dataset shape:", data.shape)
print(data.head())

# Split features and target
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# Normalize features for MultinomialNB (it requires non-negative integer-like data)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y
)

# =======================
# Gaussian Naive Bayes
# =======================
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_gnb = gnb.predict(X_test)

acc_gnb = accuracy_score(y_test, y_pred_gnb)
f1_gnb = f1_score(y_test, y_pred_gnb)

print("\n=== Gaussian Naive Bayes ===")
print(f"Accuracy: {acc_gnb:.4f}")
print(f"F1-Score: {f1_gnb:.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred_gnb))

# =======================
# Multinomial Naive Bayes
# =======================
mnb = MultinomialNB()
mnb.fit(X_train, y_train)
y_pred_mnb = mnb.predict(X_test)

acc_mnb = accuracy_score(y_test, y_pred_mnb)
f1_mnb = f1_score(y_test, y_pred_mnb)

print("\n=== Multinomial Naive Bayes ===")
print(f"Accuracy: {acc_mnb:.4f}")
print(f"F1-Score: {f1_mnb:.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred_mnb))


Dataset shape: (768, 9)
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  

=== Gaussian Naive Bayes ===
Accuracy: 0.7446
F1-Score: 0.6289
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.81      0.81       150
           1       0.64      0.62      0.63        81

    accuracy               

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
