# 3. Advanced Analysis
This notebook performs advanced analysis to validate churn drivers, identify high-risk customer segments, and support business recommendations.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (8,5)

df = pd.read_csv("data/cleaned_data.csv")
df.head()


In [None]:

# Encode churn for numerical analysis
df['churn_flag'] = df['churn'].map({'no': 0, 'yes': 1})
df[['churn', 'churn_flag']].head()


In [None]:

# Churn rate by categorical segments
cat_cols = df.select_dtypes(include='object').columns
cat_cols = cat_cols.drop('churn')

for col in cat_cols:
    churn_rate = df.groupby(col)['churn_flag'].mean().sort_values(ascending=False)
    print(f"\nChurn rate by {col}")
    print(churn_rate)


In [None]:

# Numeric feature comparison between churned and retained customers
num_cols = df.select_dtypes(include='number').columns
num_cols = num_cols.drop('churn_flag')

df.groupby('churn')[num_cols].mean()


In [None]:

# Correlation analysis including churn
corr = df[num_cols.tolist() + ['churn_flag']].corr()

plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Correlation of Features with Churn")
plt.show()


In [None]:

# High-risk customer profile
high_risk_customers = df[df['churn_flag'] == 1]
high_risk_customers.describe(include='all')


## Key Insights
- Certain customer segments exhibit significantly higher churn rates.
- Numeric features show measurable differences between churned and retained customers.
- Correlation analysis confirms key churn drivers.
- A clear high-risk customer profile is identifiable for targeted retention strategies.