In [2]:
import pandas as pd

churn= pd.read_csv(f"final_churn_data.csv")

In [3]:
X = churn.drop('Exited', axis=1)
y = churn['Exited']


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_lr = log_reg.predict(X_test)

print("🔹 Logistic Regression Results:")
print(classification_report(y_test, y_pred_lr))
print("ROC-AUC:", roc_auc_score(y_test, log_reg.predict_proba(X_test)[:,1]))


🔹 Logistic Regression Results:
              precision    recall  f1-score   support

           0       0.82      0.97      0.89      1593
           1       0.57      0.17      0.26       404

    accuracy                           0.81      1997
   macro avg       0.70      0.57      0.57      1997
weighted avg       0.77      0.81      0.76      1997

ROC-AUC: 0.76607279372005


In [6]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("🔹 Random Forest Results:")
print(classification_report(y_test, y_pred_rf))
print("ROC-AUC:", roc_auc_score(y_test, rf.predict_proba(X_test)[:,1]))


🔹 Random Forest Results:
              precision    recall  f1-score   support

           0       0.87      0.97      0.92      1593
           1       0.77      0.45      0.57       404

    accuracy                           0.86      1997
   macro avg       0.82      0.71      0.74      1997
weighted avg       0.85      0.86      0.85      1997

ROC-AUC: 0.8511899212520122


In [7]:
from xgboost import XGBClassifier

xgb = XGBClassifier(random_state=42)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

print("🔹 XGBoost Results:")
print(classification_report(y_test, y_pred_xgb))
print("ROC-AUC:", roc_auc_score(y_test, xgb.predict_proba(X_test)[:,1]))


🔹 XGBoost Results:
              precision    recall  f1-score   support

           0       0.88      0.94      0.91      1593
           1       0.68      0.49      0.57       404

    accuracy                           0.85      1997
   macro avg       0.78      0.72      0.74      1997
weighted avg       0.84      0.85      0.84      1997

ROC-AUC: 0.841598142865134


In [9]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score

# Get predicted probabilities for the positive class (churn = 1)
y_proba_lr = log_reg.predict_proba(X_test)[:,1]
y_proba_rf = rf.predict_proba(X_test)[:,1]
y_proba_xgb = xgb.predict_proba(X_test)[:,1]

# Compute ROC curves
fpr_lr, tpr_lr, _ = roc_curve(y_test, y_proba_lr)
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_proba_rf)
fpr_xgb, tpr_xgb, _ = roc_curve(y_test, y_proba_xgb)

# Compute AUC scores
auc_lr = roc_auc_score(y_test, y_proba_lr)
auc_rf = roc_auc_score(y_test, y_proba_rf)
auc_xgb = roc_auc_score(y_test, y_proba_xgb)

# Plot
plt.figure(figsize=(8,6))
plt.plot(fpr_lr, tpr_lr, label=f"Logistic Regression (AUC = {auc_lr:.3f})", linewidth=2)
plt.plot(fpr_rf, tpr_rf, label=f"Random Forest (AUC = {auc_rf:.3f})", linewidth=2)
plt.plot(fpr_xgb, tpr_xgb, label=f"XGBoost (AUC = {auc_xgb:.3f})", linewidth=2)

# Reference line (no skill)
plt.plot([0,1], [0,1], 'k--', label='No Skill', linewidth=1.5)

# Formatting
plt.title('ROC Curve Comparison for Churn Prediction', fontsize=13)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.grid(True)
plt.show()


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Exited
