# Customer Churn End-to-End Analysis

### 1. Data Loading and Initial Exploration

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
df = pd.read_csv('Customer Churn.csv')

# Display the first few rows
df.head()

In [None]:
# Get a concise summary of the dataframe and check for missing values
print("--- Data Info ---")
df.info()

print("\n--- Initial Missing Values Check ---")
print(df.isnull().sum())

### 2. Data Cleaning and Preparation

In [None]:
# The 'TotalCharges' column is an object type and contains empty strings. 
# These likely correspond to new customers with 0 tenure.
# Convert empty strings to NaN and then fill with 0.
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df['TotalCharges'].fillna(0, inplace=True)

# Verify the change
print("--- Missing values after cleaning TotalCharges ---")
print(df.isnull().sum())

print("\n--- Data type of TotalCharges after cleaning ---")
print(df['TotalCharges'].dtype)

In [None]:
# The 'SeniorCitizen' column is numerical (0/1), let's map it to 'No'/'Yes' for clarity in plots.
df['SeniorCitizen'] = df['SeniorCitizen'].map({0: 'No', 1: 'Yes'})

# Display a value count to confirm the change
df['SeniorCitizen'].value_counts()

### 3. Exploratory Data Analysis (EDA) & Visualization

#### Overall Churn Rate

In [None]:
plt.figure(figsize=(5, 5))
churn_counts = df['Churn'].value_counts()
plt.pie(churn_counts, labels=churn_counts.index, autopct='%1.1f%%', startangle=90, colors=['#1f77b4', '#ff7f0e'])
plt.title('Percentage of Churned Customers')
plt.ylabel('')
plt.show()

**Insight**: About 26.5% of customers have churned, which is a substantial number to investigate.

#### Churn by Demographics (Gender & Senior Citizen)

In [None]:
total_counts = df.groupby('SeniorCitizen')['Churn'].value_counts(normalize=True).unstack() * 100

# Plotting
fig, ax = plt.subplots(figsize=(6, 5))
total_counts.plot(kind='bar', stacked=True, ax=ax, color=['#1f77b4', '#ff7f0e'])

# Add percentage labels
for p in ax.patches:
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy()
    if height > 0:
        ax.text(x + width / 2, y + height / 2, f'{height:.1f}%', ha='center', va='center', color='white', fontweight='bold')

plt.title('Churn Rate by Senior Citizen Status')
plt.xlabel('Senior Citizen')
plt.ylabel('Percentage (%)')
plt.xticks(rotation=0)
plt.legend(title='Churn')
plt.show()

**Insight**: Senior citizens have a significantly higher churn rate (41.7%) compared to non-seniors (23.6%).

#### Churn by Contract Type

In [None]:
plt.figure(figsize=(7, 5))
ax = sns.countplot(x="Contract", data=df, hue="Churn")
plt.title("Churn Rate by Contract Type")
plt.show()

**Insight**: Customers with month-to-month contracts churn at a much higher rate than those with one or two-year contracts. This highlights the importance of long-term contracts for customer retention.

#### Churn by Tenure

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data=df, x='tenure', hue='Churn', multiple='stack', bins=30, kde=False)
plt.title('Churn Distribution by Customer Tenure')
plt.xlabel('Tenure (Months)')
plt.ylabel('Number of Customers')
plt.show()

**Insight**: Churn is heavily concentrated among new customers (low tenure). The longer a customer stays, the less likely they are to leave.

#### Churn by Payment Method

In [None]:
plt.figure(figsize = (8,5))
ax = sns.countplot(x = "PaymentMethod", data = df, hue = "Churn")
plt.title("Churn Rate by Payment Method")
plt.xticks(rotation = 45, ha='right')
plt.show()

**Insight**: Customers using Electronic checks are far more likely to churn. Promoting automatic payment methods could be a key retention strategy.

### 4. Data Normalization and Export for Power BI

In [None]:
# Create a copy for normalization to keep the original df intact
df_normalized = df.copy()

# Identify numerical columns for scaling
numerical_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']

# Initialize the scaler
scaler = MinMaxScaler()

# Fit and transform the data
df_normalized[numerical_cols] = scaler.fit_transform(df_normalized[numerical_cols])

# Display the first few rows of the normalized data to verify
print("--- First 5 rows of Normalized DataFrame ---")
df_normalized.head()

In [None]:
# Save the final cleaned and normalized dataframe to a CSV file
output_filename = 'cleaned_normalized_churn_data.csv'
df_normalized.to_csv(output_filename, index=False)

print(f"Successfully saved the cleaned and normalized data to '{output_filename}'")