# Customer Churn Analysis - Exploratory Data Analysis

This notebook explores the customer churn dataset to identify patterns and insights that can help predict customer churn.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for all plots
plt.style.use('seaborn')
sns.set_palette('Set2')

In [None]:
# Load the dataset
df = pd.read_csv('../data/customer_data.csv')
print(f"Dataset shape: {df.shape}")
df.head()

## 1. Data Overview

In [None]:
# Basic statistics
print("\nBasic Statistics:")
df.describe()

# Missing values
print("\nMissing Values:")
df.isnull().sum()

## 2. Customer Demographics Analysis

In [None]:
# Gender distribution
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='gender', hue='churn')
plt.title('Churn Distribution by Gender')
plt.show()

# Age distribution
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x='age', hue='churn', bins=30)
plt.title('Age Distribution by Churn Status')
plt.show()

## 3. Service Usage Analysis

In [None]:
# Contract type analysis
plt.figure(figsize=(10, 6))
contract_churn = df.groupby('contract_type')['churn'].mean()
contract_churn.plot(kind='bar')
plt.title('Churn Rate by Contract Type')
plt.ylabel('Churn Rate')
plt.show()

## 4. Financial Analysis

In [None]:
# Monthly charges vs churn
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='churn', y='monthly_charges')
plt.title('Monthly Charges Distribution by Churn Status')
plt.show()

# Correlation analysis
numeric_cols = ['age', 'tenure', 'monthly_charges', 'total_charges']
correlation = df[numeric_cols + ['churn']].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

## 5. Key Findings

1. Contract Type Impact:
   - Month-to-month contracts have higher churn rates
   - Long-term contracts show better retention

2. Service Usage:
   - Fiber optic customers show mixed retention
   - Phone service appears to have minimal impact

3. Financial Patterns:
   - Higher monthly charges correlate with increased churn
   - Longer tenure correlates with lower churn

4. Demographics:
   - Age shows moderate correlation with churn
   - Gender has minimal impact on churn rates