# 01. 📦 Import Libraries & Utilities

"""
This section imports all required libraries including custom utilities (MyLibrary), 
data handling tools (Pandas, NumPy), machine learning tools (scikit-learn, XGBoost), 
and utilities for visualization, saving models, and statistical analysis.
"""

In [None]:
import MyLibrary as lib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, roc_auc_score, roc_curve, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
import scipy.stats as stats
import datetime
import joblib

# 02. 📊 Load & Explore Data

"""
Load the dataset using the custom readfile function and explore it using another custom function.
"""

In [None]:
data = lib.readfile("../Data/bankcustomer.csv")
lib.DataExploration(data)

# 03. 🧼 Data Cleaning & Preprocessing

"""
Split the dataset into features and labels, handle missing values if any, and apply standard scaling.
"""

In [None]:
X = data.drop("Exited", axis=1)
y = data["Exited"]

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Apply Standard Scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 04. ⚖️ Handle Imbalanced Data with SMOTE

"""
Apply SMOTE to oversample the minority class in the training set.
"""

In [None]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train_scaled, y_train)

# 05. 🤖 Model Training (XGBoost)

"""
Train an XGBoost classifier on the resampled data.
"""

In [None]:
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_resampled, y_resampled)

# 06. 📏 Initial Evaluation

"""
Evaluate model performance using recall, precision, accuracy, ROC AUC, and a confusion matrix.
"""

In [None]:
y_pred = model.predict(X_test_scaled)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_pred))

# 07. 🔍 Hyperparameter Tuning

"""
Use RandomizedSearchCV to find the best hyperparameters for the XGBoost classifier.
"""

In [None]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0]
}

search = RandomizedSearchCV(XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
                            param_distributions=param_grid, n_iter=10, scoring='recall', cv=3, random_state=42, n_jobs=-1)
search.fit(X_resampled, y_resampled)
print("Best Parameters:", search.best_params_)

best_model = search.best_estimator_

# 08. 🧪 Final Evaluation

"""
Evaluate the best-tuned model on the test set.
"""

In [None]:
y_final_pred = best_model.predict(X_test_scaled)

print("Final Classification Report:\n", classification_report(y_test, y_final_pred))
print("Final Confusion Matrix:\n", confusion_matrix(y_test, y_final_pred))
print("Final Accuracy:", accuracy_score(y_test, y_final_pred))
print("Final Recall:", recall_score(y_test, y_final_pred))
print("Final ROC AUC:", roc_auc_score(y_test, y_final_pred))

# 09. 💾 Save the Final Model

"""
Save the trained model and scaler for future inference.
"""

In [None]:
joblib.dump(best_model, "final_xgb_model.joblib")
joblib.dump(scaler, "scaler.joblib")

# 10. ✅ Conclusion

"""
The pipeline successfully trained and evaluated an XGBoost classifier for customer churn prediction.
The best model was saved and is ready for deployment.
"""

In [None]:
print('✅ Pipeline Completed Successfully.')