In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import (accuracy_score, roc_auc_score, precision_score, 
                             recall_score, f1_score, matthews_corrcoef)

# 1. Load Dataset
file_id = '1y0RiYh66BRUcKZ4En5imj5ZKMYHGJx6n'
train_download_url = ftrain_download_url = f'https://drive.google.com/uc?export=download&id={'1y0RiYh66BRUcKZ4En5imj5ZKMYHGJx6n'}'
df = pd.read_csv(train_download_url)

# 2. Preprocessing
le = LabelEncoder()
categorical_cols = ['Month', 'VisitorType', 'Weekend', 'Revenue']
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

X = df.drop('Revenue', axis=1)
y = df['Revenue']

# 3. Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 4. Initialize and Train XGBoost
# used_label_encoder=False and eval_metric='logloss' prevent common warnings

xgb_model = XGBClassifier(
    n_estimators=100, 
    learning_rate=0.1, 
    max_depth=5, 
    eval_metric='logloss', # Keep this to prevent other warnings
    random_state=42
)

xgb_model.fit(X_train, y_train)

# 5. Predictions
y_pred = xgb_model.predict(X_test)
y_probs = xgb_model.predict_proba(X_test)[:, 1]

# 6. Results
metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "AUC Score": roc_auc_score(y_test, y_probs),
    "Precision": precision_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "F1 Score": f1_score(y_test, y_pred),
    "MCC Score": matthews_corrcoef(y_test, y_pred)
}

# Display results 
print("--- XGBoost Evaluation Metrics ---")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

--- XGBoost Evaluation Metrics ---
Accuracy: 0.9002
AUC Score: 0.9262
Precision: 0.7173
Recall: 0.5913
F1 Score: 0.6482
MCC Score: 0.5945
