In [None]:
# 📦 Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import seaborn as sns

# 📥 Load Dataset
df = pd.read_csv("exchange_dataset_enhanced.csv")  # Replace with your actual file name
df.head()


# 🛠️ Optional Feature Engineering (adjust based on your dataset)
if 'customer_age' in df.columns and 'customer_engagement_score' in df.columns:
    df['engagement_to_age_ratio'] = df['customer_engagement_score'] / (df['customer_age'] + 1)

# 🧼 Handle missing values (basic strategy)
df.dropna(inplace=True)

# 🧠 Define Features and Target
X = df.drop('exchange', axis=1)
y = df['exchange']

# 🧪 Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 🚀 Train XGBoost Model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train, y_train)

# 🔍 Predictions
y_pred = model.predict(X_test)

# 📊 Evaluation
print("\n📊 Classification Report for XGBoost:")
print(classification_report(y_test, y_pred))

print("\n🧮 Confusion Matrix for XGBoost:")
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - XGBoost")
plt.show()

# 📈 Optional: Feature Importance Plot
importances = model.feature_importances_
feat_names = X.columns
feat_imp = pd.Series(importances, index=feat_names).sort_values(ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x=feat_imp, y=feat_imp.index)
plt.title("Feature Importances (XGBoost)")
plt.show()
