In [1]:
import pandas as pd
import numpy as np

from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    matthews_corrcoef, roc_auc_score, confusion_matrix, classification_report
)
import joblib

In [2]:
# Robust target loader: works whether 'target' header exists or not
def load_target(path):
    df = pd.read_csv(path)
    if 'target' in df.columns:
        return df['target'].astype(int)
    else:
        return df.iloc[:, 0].astype(int)

In [4]:
# 1) Load StandardScaled features ( GaussianNB)
X_train = pd.read_csv('X_train_standardscaled.csv')
X_test  = pd.read_csv('X_test_standardscaled.csv')

In [5]:
# 2) Load targets
y_train = load_target('y_train.csv')
y_test  = load_target('y_test.csv')

In [6]:
# Sanity checks
print("Shapes:")
print(f"  X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"  X_test:  {X_test.shape},  y_test:  {y_test.shape}")

# Check for non-finite values or NaNs
if not np.isfinite(X_train.values).all() or not np.isfinite(X_test.values).all():
    raise ValueError("Non-finite values detected in features. Please re-run preprocessing.")
if y_train.isnull().sum() > 0 or y_test.isnull().sum() > 0:
    raise ValueError("NaNs found in y_train/y_test. Please re-run preprocessing.")

Shapes:
  X_train: (455, 30), y_train: (455,)
  X_test:  (114, 30),  y_test:  (114,)


In [7]:
# 3) Train Gaussian Naive Bayes
# var_smoothing adds a tiny value to variances for numerical stability
gnb = GaussianNB(var_smoothing=1e-9)
gnb.fit(X_train, y_train)

In [8]:
# 4) Predictions & probabilities
y_pred = gnb.predict(X_test)
y_proba = gnb.predict_proba(X_test)[:, 1]  # needed for AUC

In [9]:
# 5) Metrics
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
mcc = matthews_corrcoef(y_test, y_pred)
auc = roc_auc_score(y_test, y_proba)

print("\n=== Gaussian Naive Bayes Performance (StandardScaled features) ===")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"MCC:       {mcc:.4f}")
print(f"AUC:       {auc:.4f}")

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=4))


=== Gaussian Naive Bayes Performance (StandardScaled features) ===
Accuracy:  0.9211
Precision: 0.9231
Recall:    0.8571
F1 Score:  0.8889
MCC:       0.8292
AUC:       0.9891

Confusion Matrix:
[[69  3]
 [ 6 36]]

Classification Report:
              precision    recall  f1-score   support

           0     0.9200    0.9583    0.9388        72
           1     0.9231    0.8571    0.8889        42

    accuracy                         0.9211       114
   macro avg     0.9215    0.9077    0.9138       114
weighted avg     0.9211    0.9211    0.9204       114



In [10]:
# 6) Save model
joblib.dump(gnb, 'gaussian_nb_model.pkl')
print("\nSaved: gaussian_nb_model.pkl")


Saved: gaussian_nb_model.pkl
