In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, roc_auc_score, precision_score, 
                             recall_score, f1_score, matthews_corrcoef, 
                             confusion_matrix, classification_report)

# 1. Load Dataset
file_id = '1y0RiYh66BRUcKZ4En5imj5ZKMYHGJx6n'
train_download_url = ftrain_download_url = f'https://drive.google.com/uc?export=download&id={'1y0RiYh66BRUcKZ4En5imj5ZKMYHGJx6n'}'
df = pd.read_csv(train_download_url)

# 2. Preprocessing
# Convert categorical strings to numeric
le = LabelEncoder()
df['Month'] = le.fit_transform(df['Month'])
df['VisitorType'] = le.fit_transform(df['VisitorType'])
df['Weekend'] = le.fit_transform(df['Weekend'])
df['Revenue'] = le.fit_transform(df['Revenue']) # Target variable

X = df.drop('Revenue', axis=1)
y = df['Revenue']

# 3. Split Data (Training and Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 4. Feature Scaling 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 5. Initialize and Train Logistic Regression
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)

# 6. Predictions
y_pred = lr_model.predict(X_test)
y_probs = lr_model.predict_proba(X_test)[:, 1] # Required for AUC

# 7. Results
metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "AUC Score": roc_auc_score(y_test, y_probs),
    "Precision": precision_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "F1 Score": f1_score(y_test, y_pred),
    "MCC Score": matthews_corrcoef(y_test, y_pred)
}

# Display Results
print("--- Logistic Regression Evaluation Metrics ---")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

# Classification Report 
print("\nClassification Report:\n", classification_report(y_test, y_pred))

--- Logistic Regression Evaluation Metrics ---
Accuracy: 0.8805
AUC Score: 0.8794
Precision: 0.7472
Recall: 0.3496
F1 Score: 0.4763
MCC Score: 0.4574

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.98      0.93      3124
           1       0.75      0.35      0.48       575

    accuracy                           0.88      3699
   macro avg       0.82      0.66      0.70      3699
weighted avg       0.87      0.88      0.86      3699

