In [None]:
# Customer Churn Prediction Workflow

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load Data

df = pd.read_csv('../data/customers.csv')
display(df.head())

# 2. Data Cleaning
# Encode gender
le = LabelEncoder()
df['gender'] = le.fit_transform(df['gender'])

# No missing values in sample, but here's how to handle:
df = df.fillna(df.mean(numeric_only=True))

# 3. Feature Engineering
features = ['age', 'gender', 'tenure', 'purchase_frequency', 'recency', 'avg_spend', 'engagement_score']
X = df[features]
y = df['churn']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. Model Training & Evaluation
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'SVM': SVC(),
    'Random Forest': RandomForestClassifier()
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f"{name} Accuracy: {acc:.2f}")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    print("-"*40)

# 5. Insights & Visualization
best_model = RandomForestClassifier()
best_model.fit(X_train, y_train)
importances = best_model.feature_importances_

plt.figure(figsize=(8,5))
sns.barplot(x=features, y=importances)
plt.title('Feature Importances for Churn Prediction')
plt.ylabel('Importance')
plt.xticks(rotation=45)
plt.show()

churn_rate = df['churn'].mean()
print(f"Overall churn rate: {churn_rate:.2f}")
