# Customer Churn with Explainable AI — Answer Key
This notebook trains a model, explains predictions, and shows subgroup metrics.

In [1]:
# 🔎 Environment status check
import sys, platform
import pandas as pd, numpy as np, matplotlib

print("Python executable:", sys.executable)
print("Python version   :", platform.python_version())
print("Pandas version   :", pd.__version__)
print("NumPy version    :", np.__version__)
print("Matplotlib ver.  :", matplotlib.__version__)

Python executable: c:\Users\beall\miniconda3\envs\ai-scm-portfolio\python.exe
Python version   : 3.11.13
Pandas version   : 2.3.2
NumPy version    : 2.2.6
Matplotlib ver.  : 3.10.6


In [2]:
try:
    import sklearn, pulp, simpy
    print("scikit-learn ver.:", sklearn.__version__)
    print("PuLP version     :", pulp.__version__)
    print("SimPy version    :", simpy.__version__)
except ImportError as e:
    print("Missing package:", e.name)


scikit-learn ver.: 1.7.2
PuLP version     : 2.8.0
SimPy version    : 4.1.1


In [3]:
try:
    import shap, prophet, tensorflow, cv2
    print("SHAP ver.        :", shap.__version__)
    print("Prophet ver.     :", prophet.__version__)
    print("TensorFlow ver.  :", tensorflow.__version__)
    print("OpenCV ver.      :", cv2.__version__)
except ImportError:
    pass


Importing plotly failed. Interactive plots will not work.


SHAP ver.        : 0.48.0
Prophet ver.     : 1.1.7
TensorFlow ver.  : 2.20.0
OpenCV ver.      : 4.12.0


In [None]:
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, classification_report
from sklearn.inspection import permutation_importance

X = pd.read_csv("../data/customers.csv")
y = pd.read_csv("../data/labels.csv")["churned"]
cat_cols = ["region"]
num_cols = [c for c in X.columns if c not in ["customer_id"] + cat_cols]
pre = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ("num", "passthrough", num_cols)
])
model = LogisticRegression(max_iter=500)
pipe = Pipeline([("pre", pre), ("clf", model)])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
pipe.fit(X_train, y_train)
proba = pipe.predict_proba(X_test)[:,1]
print("ROC AUC:", roc_auc_score(y_test, proba).round(3))
print(classification_report(y_test, (proba>0.5).astype(int)))

# Explainability
try:
    import shap
    explainer = shap.Explainer(pipe, X_train)
    sv = explainer(X_test.sample(200, random_state=42))
    shap.plots.beeswarm(sv, show=False)
except Exception as e:
    print("SHAP unavailable, using permutation importances:", e)
    r = permutation_importance(pipe, X_test, y_test, n_repeats=5, random_state=42)
    importances = pd.Series(r.importances_mean, index=pipe.named_steps['pre'].get_feature_names_out())
    print(importances.sort_values(ascending=False).head(10))

# Simple subgroup metric
df_test = X_test.copy(); df_test["y"] = y_test.values; df_test["p"] = proba
for region in df_test["region"].unique():
    sub = df_test[df_test["region"]==region]
    auc = roc_auc_score(sub["y"], sub["p"])
    print(f"AUC for region={region}: {auc:.3f}")
