In [2]:
import pandas as pd, joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

df = pd.read_csv('../dataset/cleaned_telco_churn.csv')
pipeline = joblib.load('../models/churn_pipeline.pkl')
dash = pd.read_csv('../dataset/dashboard_data.csv')

# 1. Check dashboard has both columns
assert 'ChurnProb' in dash and 'ChurnPred' in dash

# 2. Recompute metrics
X = df.drop(['customerID','Churn'], axis=1)
y = df['Churn'].map({'No':0,'Yes':1})
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42,stratify=y)
y_proba = pipeline.predict_proba(X_test)[:,1]
auc = roc_auc_score(y_test, y_proba)
print("ROC AUC:", auc)

# 3. Confirm pipeline loads and is the one you just saved
print("Pipeline steps:", pipeline.named_steps)


ROC AUC: 0.8373563992179419
Pipeline steps: {'prep': ColumnTransformer(transformers=[('num', StandardScaler(),
                                 ['tenure', 'MonthlyCharges', 'TotalCharges']),
                                ('cat',
                                 OneHotEncoder(drop='first',
                                               sparse_output=False),
                                 ['gender', 'SeniorCitizen', 'Partner',
                                  'Dependents', 'PhoneService', 'MultipleLines',
                                  'InternetService', 'OnlineSecurity',
                                  'OnlineBackup', 'DeviceProtection',
                                  'TechSupport', 'StreamingTV',
                                  'StreamingMovies', 'Contract',
                                  'PaperlessBilling', 'PaymentMethod'])]), 'smote': SMOTE(random_state=42), 'clf': GradientBoostingClassifier(learning_rate=0.05, n_iter_no_change=10,
                           random