In [None]:
# Step 1: Import Libraries
%pip install pandas numpy matplotlib seaborn scikit-learn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from sklearn.mixture import GaussianMixture
from sklearn.cluster import DBSCAN
data = pd.read_csv('churn_prediction/personal_maps/urson/BankChurners.csv')
# Step 2: Load and Clean Dataset

# Drop CLIENTNUM, Naive Bayes features, and redundant features
data.drop(columns=['CLIENTNUM',
                   'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1',
                   'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2',
                   'Avg_Open_To_Buy'],
          inplace=True)

# Overview
print("Updated Feature Set:", data.columns)
print("Data Overview:")
print(data.head())
print("Shape of data:", data.shape)
print("Null Values:")
print(data.isnull().sum())

# Encode target variable and binary features
data['Attrition_Flag'] = data['Attrition_Flag'].replace({'Attrited Customer': 1, 'Existing Customer': 0})
data['Gender'] = data['Gender'].replace({'F': 1, 'M': 0})

# One-hot encode categorical features
categorical_cols = ['Education_Level', 'Income_Category', 'Marital_Status', 'Card_Category']
for col in categorical_cols:
    if 'Unknown' in data[col].unique():
        data = pd.concat([data, pd.get_dummies(data[col], prefix=col).drop(columns=f'{col}_Unknown')], axis=1)
    else:
        data = pd.concat([data, pd.get_dummies(data[col], prefix=col)], axis=1)

# Drop original categorical columns
data.drop(columns=categorical_cols, inplace=True)

# Confirm Cleaned Data
print("Cleaned Data:")
print(data.head())
print("Updated Shape:", data.shape)

# Step 3: Scale Numerical Features
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(data.drop(columns=['Attrition_Flag'])), 
                 columns=data.drop(columns=['Attrition_Flag']).columns)

# Step 4: Clustering Methods
def clustering_pipeline(model, name, X):
    """ Function to run clustering model and calculate silhouette score """
    labels = model.fit_predict(X)
    silhouette = silhouette_score(X, labels) if len(set(labels)) > 1 else -1
    print(f"{name} Silhouette Score: {silhouette:.2f}")
    return labels

# K-Means
print("\n--- K-Means Clustering ---")
kmeans = KMeans(n_clusters=3, random_state=42)
X['KMeans_Cluster'] = clustering_pipeline(kmeans, "K-Means", X)

# DBSCAN
print("\n--- DBSCAN Clustering ---")
dbscan = DBSCAN(eps=0.5, min_samples=10)
X['DBSCAN_Cluster'] = clustering_pipeline(dbscan, "DBSCAN", X)

# Gaussian Mixture
print("\n--- Gaussian Mixture Clustering ---")
gmm = GaussianMixture(n_components=3, random_state=42)
X['GMM_Cluster'] = gmm.fit_predict(X)

# Agglomerative Clustering
print("\n--- Agglomerative Clustering ---")
agglo = AgglomerativeClustering(n_clusters=3)
X['Agglo_Cluster'] = clustering_pipeline(agglo, "Agglomerative", X)

# Step 5: PCA for Visualization
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X.drop(columns=['KMeans_Cluster', 'DBSCAN_Cluster', 'GMM_Cluster', 'Agglo_Cluster']))
X['PCA1'], X['PCA2'] = X_pca[:, 0], X_pca[:, 1]

# Visualization of Clusters
plt.figure(figsize=(12, 8))
for i, cluster in enumerate(['KMeans_Cluster', 'DBSCAN_Cluster', 'GMM_Cluster', 'Agglo_Cluster']):
    plt.subplot(2, 2, i+1)
    sns.scatterplot(x='PCA1', y='PCA2', hue=cluster, data=X, palette='coolwarm', legend='full')
    plt.title(f"{cluster} Clustering")
plt.tight_layout()
plt.show()

# Step 6: Post-Clustering Analysis
X['Attrition_Flag'] = data['Attrition_Flag'].reset_index(drop=True)

for cluster_col in ['KMeans_Cluster', 'DBSCAN_Cluster', 'GMM_Cluster', 'Agglo_Cluster']:
    print(f"\nChurn Rate by {cluster_col}:")
    print(X.groupby(cluster_col)['Attrition_Flag'].mean())
