In [2]:
# =========================================
# CUSTOMER SEGMENTATION + SEGMENT-WISE CHURN MODEL
# =========================================

# Upload file
from google.colab import files
files.upload()

# Install libraries
!pip install pandas numpy scikit-learn

# Import packages
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load dataset
df = pd.read_csv("customer_churn (1).csv")

print("Dataset Loaded Successfully")
print(df.head())

# Select numeric columns
df_num = df.select_dtypes(include=['number']).dropna()

X = df_num.drop("Churn", axis=1)
y = df_num["Churn"]

# Standardization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# -----------------------------
# CLUSTERING
# -----------------------------
kmeans = KMeans(n_clusters=3, random_state=42)
df_num["kmeans_cluster"] = kmeans.fit_predict(X_scaled)

agg = AgglomerativeClustering(n_clusters=3)
df_num["agg_cluster"] = agg.fit_predict(X_scaled)

print("\nKMeans Cluster Distribution:")
print(df_num["kmeans_cluster"].value_counts())

# -----------------------------
# SEGMENT-WISE MODELS
# -----------------------------
results = {}

for cluster in df_num["kmeans_cluster"].unique():
    print(f"\nTraining model for Cluster {cluster}")

    segment = df_num[df_num["kmeans_cluster"] == cluster]

    if len(segment) < 30:
        print("Segment too small â€“ skipped")
        continue

    X_seg = segment.drop(["Churn","kmeans_cluster","agg_cluster"], axis=1)
    y_seg = segment["Churn"]

    X_train, X_test, y_train, y_test = train_test_split(
        X_seg, y_seg, test_size=0.2, random_state=42)

    rf = RandomForestClassifier(random_state=42)

    params = {
        "n_estimators": [50,100],
        "max_depth": [3,5,None]
    }

    grid = GridSearchCV(rf, params, cv=3)
    grid.fit(X_train, y_train)

    best_model = grid.best_estimator_
    preds = best_model.predict(X_test)

    results[cluster] = {
        "Best Params": grid.best_params_,
        "Accuracy": accuracy_score(y_test,preds),
        "Precision": precision_score(y_test,preds, zero_division=0),
        "Recall": recall_score(y_test,preds, zero_division=0),
        "F1 Score": f1_score(y_test,preds, zero_division=0)
    }

# -----------------------------
# FINAL OUTPUT
# -----------------------------
print("\nSEGMENT WISE MODEL PERFORMANCE")

for k,v in results.items():
    print(f"\nCluster {k}")
    for metric,val in v.items():
        print(metric,":",val)

Saving customer_churn (1).csv to customer_churn (1) (1).csv
Dataset Loaded Successfully
  CustomerID  Tenure  MonthlyCharges  TotalCharges        Contract  \
0     C00001       6              64          1540        One year   
1     C00002      21             113          1753  Month-to-month   
2     C00003      27              31          1455        Two year   
3     C00004      53              29          7150  Month-to-month   
4     C00005      16             185          1023        One year   

      PaymentMethod PaperlessBilling  SeniorCitizen  Churn  
0       Credit Card               No              1      0  
1  Electronic Check              Yes              1      0  
2       Credit Card               No              1      0  
3  Electronic Check               No              1      0  
4  Electronic Check               No              1      0  

KMeans Cluster Distribution:
kmeans_cluster
0    222
2    153
1    125
Name: count, dtype: int64

Training model for Cluster