In [None]:
# STEP 0: Install ELI5 (only run once)
!pip install eli5


Collecting eli5
  Downloading eli5-0.16.0-py2.py3-none-any.whl.metadata (18 kB)
Downloading eli5-0.16.0-py2.py3-none-any.whl (108 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.4/108.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: eli5
Successfully installed eli5-0.16.0


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from eli5.sklearn import PermutationImportance
import eli5

# Load dataset
df = pd.read_csv('Telecom Churn Analysis.csv')

# Convert TotalCharges to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Drop null values
df.dropna(subset=['TotalCharges'], inplace=True)

# Feature engineering
df['AvgMonthlySpend'] = df['TotalCharges'] / df['tenure']
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})
df.drop('customerID', axis=1, inplace=True)

# Encode categorical variables
categorical_cols = df.select_dtypes(include='object').columns
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# Split into X and y
X = df.drop('Churn', axis=1)
y = df['Churn']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Explainability
perm = PermutationImportance(model, random_state=42).fit(X_test, y_test)
eli5.show_weights(perm, feature_names=X_test.columns.tolist())

# Add churn probabilities and segment
df['churn_prob'] = model.predict_proba(X)[:, 1]
df['segment'] = df['churn_prob'].apply(lambda x: 'At Risk' if x > 0.6 else ('Loyal' if x < 0.3 else 'Dormant'))

# Export to CSV for Power BI
df.to_csv('churn_predictions.csv', index=False)


Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.90      0.87      1033
           1       0.65      0.50      0.56       374

    accuracy                           0.79      1407
   macro avg       0.74      0.70      0.71      1407
weighted avg       0.78      0.79      0.79      1407

Confusion Matrix:
 [[931 102]
 [187 187]]
