In [1]:
import pandas as pd
import shap
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Load data
X = pd.read_csv("../data/X_features.csv")
y = pd.read_csv("../data/y_target.csv").squeeze()

# Train model
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# SHAP explainer (stable)
explainer = shap.Explainer(model, X_train)
shap_values = explainer(X_test)

# Convert SHAP values to DataFrame (NO PLOTTING)
shap_df = pd.DataFrame(
    abs(shap_values.values),
    columns=X.columns
)

# Global importance
shap_importance = shap_df.mean().sort_values(ascending=False)
shap_importance.head(10)


  from .autonotebook import tqdm as notebook_tqdm


tenure                         1.117868
InternetService_Fiber optic     0.59683
Contract_Two year              0.448746
TotalCharges                   0.429393
MonthlyCharges                 0.412373
Contract_One year              0.260147
MultipleLines_Yes              0.185359
StreamingMovies_Yes            0.184434
PaperlessBilling               0.182334
StreamingTV_Yes                0.175011
dtype: object