In [1]:
import joblib
import shap
import pandas as pd
import matplotlib.pyplot as plt
import os

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [2]:
# ---------------------------------------------------------------
# Load Model and Data
# ---------------------------------------------------------------
# Load final model (Logistic Regression)
model = joblib.load("../model/churn_model.pkl")

# Load feature columns (saved earlier)
feature_columns = joblib.load("../model/feature_columns.pkl")

# Load cleaned dataset
df = pd.read_csv("../data/cleaned_churn_data.csv")

# Split back into X and y
X = df.drop("Churn", axis=1)
y = df["Churn"]

# Ensure feature alignment
X = X[feature_columns]

In [7]:
# ---------------------------------------------------------------
# Compute SHAP Values (for Logistic Regression)
# ---------------------------------------------------------------
explainer = shap.Explainer(model, X)
shap_values = explainer(X)

# ---------------------------------------------------------------
#  Create Results Folder for Plots
# ---------------------------------------------------------------
os.makedirs("../results/eda_plots", exist_ok=True)

# ---------------------------------------------------------------
# SHAP Summary Plot (Global Feature Impact)
# ---------------------------------------------------------------
plt.title("SHAP Summary â€“ Feature Impact on Churn")
shap.summary_plot(shap_values, X, plot_type="bar", show=False)
plt.tight_layout()
plt.savefig("../results/eda_plots/shap_summary.png", bbox_inches='tight', dpi=300)
plt.close()
print(" Saved SHAP summary plot as 'results/eda_plots/shap_summary.png'")  




 Saved SHAP summary plot as 'results/eda_plots/shap_summary.png'


In [8]:
# ---------------------------------------------------------------
# Model Performance Summary
# ---------------------------------------------------------------

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Create results directory
os.makedirs("../results/metrics_plots", exist_ok=True)

# ---------------------------------------------------------------
#  Load Data & Models
# ---------------------------------------------------------------

df = pd.read_csv("../data/cleaned_churn_data.csv")
feature_columns = joblib.load("../model/feature_columns.pkl")

X = df.drop("Churn", axis=1)[feature_columns]
y = df["Churn"]

# Load individual models if saved previously
log_reg = joblib.load("../model/logistic_regression_baseline.pkl") if os.path.exists("../model/logistic_regression_baseline.pkl") else None

# Load final pipeline model
pipeline = joblib.load("../model/churn_model.pkl")

models = {
    "Logistic Regression": log_reg,
    "Final Pipeline Model": pipeline
}

# ---------------------------------------------------------------
# Evaluate Models
# ---------------------------------------------------------------

metrics_list = []

for name, model in models.items():
    if model is None:
        continue

    y_pred = model.predict(X)
    y_prob = model.predict_proba(X)[:, 1]

    metrics_list.append({
        "Model": name,
        "Accuracy": accuracy_score(y, y_pred),
        "Precision": precision_score(y, y_pred),
        "Recall": recall_score(y, y_pred),
        "F1 Score": f1_score(y, y_pred),
        "ROC-AUC": roc_auc_score(y, y_prob)
    })

metrics_df = pd.DataFrame(metrics_list)
metrics_df

# Save report
metrics_df.to_csv("../results/metrics_report.csv", index=False)
print(" Saved metrics report to 'results/metrics_report.csv'")

# ---------------------------------------------------------------
# Plot: Model Comparison (Accuracy, F1, ROC-AUC)
# ---------------------------------------------------------------

plt.figure(figsize=(10,6))
sns.barplot(x="Model", y="ROC-AUC", data=metrics_df, palette="coolwarm")
plt.title("ROC-AUC Comparison of Models")
plt.savefig("../results/metrics_plots/roc_auc_comparison.png", dpi=300, bbox_inches='tight')
plt.close()

plt.figure(figsize=(10,6))
sns.barplot(x="Model", y="Accuracy", data=metrics_df, palette="Blues")
plt.title("Accuracy Comparison of Models")
plt.savefig("../results/metrics_plots/accuracy_comparison.png", dpi=300, bbox_inches='tight')
plt.close()

print(" Saved model comparison plots in results/metrics_plots/")


 Saved metrics report to 'results/metrics_report.csv'




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.



 Saved model comparison plots in results/metrics_plots/
