In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import (accuracy_score, roc_auc_score, precision_score, 
                             recall_score, f1_score, matthews_corrcoef)

# 1. Load Dataset
file_id = '1y0RiYh66BRUcKZ4En5imj5ZKMYHGJx6n'
train_download_url = ftrain_download_url = f'https://drive.google.com/uc?export=download&id={'1y0RiYh66BRUcKZ4En5imj5ZKMYHGJx6n'}'
df = pd.read_csv(train_download_url)

# 2. Preprocessing
le = LabelEncoder()
categorical_cols = ['Month', 'VisitorType', 'Weekend', 'Revenue']
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

X = df.drop('Revenue', axis=1)
y = df['Revenue']

# 3. Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 4. Initialize and Train Naive Bayes
# GaussianNB - because of continuous features
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# 5. Predictions
y_pred = nb_model.predict(X_test)
y_probs = nb_model.predict_proba(X_test)[:, 1]

# 6.Results
metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "AUC Score": roc_auc_score(y_test, y_probs),
    "Precision": precision_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "F1 Score": f1_score(y_test, y_pred),
    "MCC Score": matthews_corrcoef(y_test, y_pred)
}

# Display results 
print("--- Naive Bayes Evaluation Metrics ---")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

--- Naive Bayes Evaluation Metrics ---
Accuracy: 0.8494
AUC Score: 0.8364
Precision: 0.5151
Recall: 0.5339
F1 Score: 0.5243
MCC Score: 0.4350
