# 2. Exploratory Data Analysis (EDA)
This notebook explores patterns, trends, and relationships in the cleaned customer churn data.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (8,5)

df = pd.read_csv("data/cleaned_data.csv")
df.head()


In [None]:

# Dataset overview
df.shape


In [None]:

# Churn distribution
churn_counts = df['churn'].value_counts()

plt.figure()
sns.barplot(x=churn_counts.index, y=churn_counts.values)
plt.title("Customer Churn Distribution")
plt.xlabel("Churn")
plt.ylabel("Number of Customers")
plt.show()

churn_counts


In [None]:

# Churn percentage
(churn_counts / churn_counts.sum()) * 100


In [None]:

# Numeric features vs churn
num_cols = df.select_dtypes(include="number").columns

for col in num_cols:
    plt.figure()
    sns.boxplot(x="churn", y=col, data=df)
    plt.title(f"{col} vs Churn")
    plt.show()


In [None]:

# Categorical features vs churn
cat_cols = df.select_dtypes(include="object").columns
cat_cols = cat_cols.drop("churn")

for col in cat_cols:
    plt.figure()
    sns.countplot(x=col, hue="churn", data=df)
    plt.title(f"{col} vs Churn")
    plt.xticks(rotation=45)
    plt.show()


In [None]:

# Correlation heatmap
plt.figure(figsize=(8,6))
sns.heatmap(df[num_cols].corr(), annot=True, cmap="coolwarm")
plt.title("Feature Correlation Heatmap")
plt.show()


In [None]:

# Key EDA observations (write in report)
print("EDA completed. Key churn patterns identified.")
