# Customer Churn Analysis - Exploratory Data Analysis
## Part 2: Understanding Churn Patterns

**Author:** Your Name  
**Date:** February 2026  
**Purpose:** Explore churn patterns and identify key drivers

---

## 1. Import Libraries and Load Data

In [None]:
# Data manipulation
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Statistical analysis
from scipy import stats

# Settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings('ignore')

print("✓ Libraries imported successfully")

In [None]:
# Load cleaned data
df = pd.read_csv('../data/cleaned_churn_data.csv')

print(f"Dataset Shape: {df.shape}")
print(f"Loaded {len(df):,} customer records")

df.head()

## 2. Overall Churn Analysis

In [None]:
# Calculate churn metrics
total_customers = len(df)
churned = len(df[df['Churn'] == 'Yes'])
retained = len(df[df['Churn'] == 'No'])
churn_rate = (churned / total_customers) * 100

print("="*60)
print("OVERALL CHURN METRICS")
print("="*60)
print(f"Total Customers: {total_customers:,}")
print(f"Churned Customers: {churned:,}")
print(f"Retained Customers: {retained:,}")
print(f"Churn Rate: {churn_rate:.2f}%")
print(f"Retention Rate: {100-churn_rate:.2f}%")

In [None]:
# Visualize churn distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Count plot
churn_counts = df['Churn'].value_counts()
axes[0].bar(['Retained', 'Churned'], [retained, churned], color=['#2ecc71', '#e74c3c'])
axes[0].set_title('Customer Churn Distribution', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Number of Customers')
for i, v in enumerate([retained, churned]):
    axes[0].text(i, v + 100, str(v), ha='center', fontweight='bold')

# Pie chart
colors = ['#2ecc71', '#e74c3c']
axes[1].pie([retained, churned], labels=['Retained', 'Churned'], autopct='%1.1f%%',
            startangle=90, colors=colors, textprops={'fontsize': 12, 'fontweight': 'bold'})
axes[1].set_title('Churn Rate Percentage', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.savefig('../images/overall_churn_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

## 3. Revenue Analysis

In [None]:
# Revenue metrics
churned_revenue = df[df['Churn'] == 'Yes']['MonthlyCharges'].sum()
retained_revenue = df[df['Churn'] == 'No']['MonthlyCharges'].sum()
total_revenue = churned_revenue + retained_revenue

# Annual projections
annual_revenue_at_risk = churned_revenue * 12
avg_churned_revenue = df[df['Churn'] == 'Yes']['MonthlyCharges'].mean()
avg_retained_revenue = df[df['Churn'] == 'No']['MonthlyCharges'].mean()

print("="*60)
print("REVENUE ANALYSIS")
print("="*60)
print(f"\nMonthly Revenue:")
print(f"  Total: ${total_revenue:,.2f}")
print(f"  From Retained Customers: ${retained_revenue:,.2f}")
print(f"  From Churned Customers: ${churned_revenue:,.2f}")
print(f"\nAnnual Revenue at Risk: ${annual_revenue_at_risk:,.2f}")
print(f"\nAverage Monthly Charges:")
print(f"  Retained Customers: ${avg_retained_revenue:.2f}")
print(f"  Churned Customers: ${avg_churned_revenue:.2f}")

## 4. Churn by Demographics

In [None]:
# Gender analysis
gender_churn = pd.crosstab(df['gender'], df['Churn'], normalize='index') * 100

print("Churn Rate by Gender:")
print(gender_churn)

In [None]:
# Senior Citizen analysis
senior_churn = pd.crosstab(df['SeniorCitizen'], df['Churn'], normalize='index') * 100

print("\nChurn Rate by Senior Citizen Status:")
print(senior_churn)

In [None]:
# Demographic visualization
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Gender
gender_data = df.groupby(['gender', 'Churn']).size().unstack()
gender_data.plot(kind='bar', ax=axes[0,0], color=['#2ecc71', '#e74c3c'])
axes[0,0].set_title('Churn by Gender', fontsize=12, fontweight='bold')
axes[0,0].set_xlabel('Gender')
axes[0,0].set_ylabel('Number of Customers')
axes[0,0].legend(['Retained', 'Churned'])
axes[0,0].tick_params(axis='x', rotation=0)

# Senior Citizen
senior_data = df.groupby(['SeniorCitizen', 'Churn']).size().unstack()
senior_data.plot(kind='bar', ax=axes[0,1], color=['#2ecc71', '#e74c3c'])
axes[0,1].set_title('Churn by Senior Citizen Status', fontsize=12, fontweight='bold')
axes[0,1].set_xlabel('Senior Citizen (0=No, 1=Yes)')
axes[0,1].set_ylabel('Number of Customers')
axes[0,1].legend(['Retained', 'Churned'])
axes[0,1].tick_params(axis='x', rotation=0)

# Partner
partner_data = df.groupby(['Partner', 'Churn']).size().unstack()
partner_data.plot(kind='bar', ax=axes[1,0], color=['#2ecc71', '#e74c3c'])
axes[1,0].set_title('Churn by Partner Status', fontsize=12, fontweight='bold')
axes[1,0].set_xlabel('Has Partner')
axes[1,0].set_ylabel('Number of Customers')
axes[1,0].legend(['Retained', 'Churned'])
axes[1,0].tick_params(axis='x', rotation=0)

# Dependents
dep_data = df.groupby(['Dependents', 'Churn']).size().unstack()
dep_data.plot(kind='bar', ax=axes[1,1], color=['#2ecc71', '#e74c3c'])
axes[1,1].set_title('Churn by Dependents Status', fontsize=12, fontweight='bold')
axes[1,1].set_xlabel('Has Dependents')
axes[1,1].set_ylabel('Number of Customers')
axes[1,1].legend(['Retained', 'Churned'])
axes[1,1].tick_params(axis='x', rotation=0)

plt.tight_layout()
plt.savefig('../images/demographic_churn_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

## 5. Churn by Contract Type (KEY DRIVER)

In [None]:
# Contract type analysis
contract_analysis = df.groupby('Contract').agg({
    'customerID': 'count',
    'Churn': lambda x: (x == 'Yes').sum()
}).rename(columns={'customerID': 'Total', 'Churn': 'Churned'})

contract_analysis['Churn_Rate'] = (contract_analysis['Churned'] / contract_analysis['Total'] * 100).round(2)
contract_analysis['Retained'] = contract_analysis['Total'] - contract_analysis['Churned']

print("="*60)
print("CHURN BY CONTRACT TYPE")
print("="*60)
print(contract_analysis)
print("\n⚠️ Month-to-month contracts have the highest churn rate!")

In [None]:
# Visualize contract type impact
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Stacked bar chart
contract_data = df.groupby(['Contract', 'Churn']).size().unstack()
contract_data.plot(kind='bar', stacked=True, ax=axes[0], color=['#2ecc71', '#e74c3c'])
axes[0].set_title('Customer Distribution by Contract Type', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Contract Type', fontsize=12)
axes[0].set_ylabel('Number of Customers', fontsize=12)
axes[0].legend(['Retained', 'Churned'], loc='upper right')
axes[0].tick_params(axis='x', rotation=45)

# Churn rate comparison
contract_churn_rates = contract_analysis['Churn_Rate'].sort_values(ascending=False)
axes[1].bar(range(len(contract_churn_rates)), contract_churn_rates.values, 
            color=['#e74c3c', '#f39c12', '#2ecc71'])
axes[1].set_xticks(range(len(contract_churn_rates)))
axes[1].set_xticklabels(contract_churn_rates.index, rotation=45)
axes[1].set_title('Churn Rate by Contract Type', fontsize=14, fontweight='bold')
axes[1].set_ylabel('Churn Rate (%)', fontsize=12)
axes[1].set_xlabel('Contract Type', fontsize=12)

# Add value labels on bars
for i, v in enumerate(contract_churn_rates.values):
    axes[1].text(i, v + 1, f'{v:.1f}%', ha='center', fontweight='bold')

plt.tight_layout()
plt.savefig('../images/contract_type_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

## 6. Churn by Tenure (KEY DRIVER)

In [None]:
# Tenure analysis
tenure_analysis = df.groupby('TenureGroup').agg({
    'customerID': 'count',
    'Churn': lambda x: (x == 'Yes').sum()
}).rename(columns={'customerID': 'Total', 'Churn': 'Churned'})

tenure_analysis['Churn_Rate'] = (tenure_analysis['Churned'] / tenure_analysis['Total'] * 100).round(2)

print("="*60)
print("CHURN BY TENURE GROUP")
print("="*60)
print(tenure_analysis)
print("\n⚠️ New customers (0-6 months) are at highest risk!")