<h1 style="color:white; font-weight:bold;">Exploratory Data Analysis</h1>

In [15]:
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.colors as colors

import dash
from dash import dcc, html

In [4]:
df = pd.read_csv("D:\College\Placements\Projects\Customer_Churn_Analysis\Dataset\Cleaned_Data.csv")

In [5]:
# Creating an index object of float type columns
float_columns = df.select_dtypes(include=['float']).columns

# Creating an index object of String type columns
object_columns = df.select_dtypes(include=['object']).columns

# Creating an index object of Ordinal type columns
ordinal_columns = df[['CityTier','SatisfactionScore', 'Complain' ]].columns

<h3 style="color:white; font-weight:bold;">1. Overall Churn Distribution</h3>

In [14]:
# Calculating churn counts
churn_counts = df['Churn'].value_counts()

# Plotting a pie chart 
labels = churn_counts.index  # Automatically assign labels based on 'Churn' column
colors = [px.colors.qualitative.Pastel1[2], px.colors.qualitative.Pastel1[0]]  

fig = px.pie(values=churn_counts, names=labels, 
             title='Overall Churn Distribution',
             color_discrete_sequence=colors,
             hole=0.4)  # Donut hole size (0 for pie, <1 for donut)

# Showing percentage and label on hover
fig.update_traces(textinfo='percent+label')

# Centering the title
fig.update_layout(title_text='Overall Churn Distribution',
                  title_x=0.5,  # Center horizontally
                  title_y=0.9,  # Adjust vertical position
                  title_font_size=20)  # Adjust font size

# Display the plot
fig.show()
fig.write_html("D:\College\Placements\Projects\Customer_Churn_Analysis\Interactive Graphs\overall_churn_distribution.html")

<h3 style="color:white; font-weight:bold;">2. Churn Distribution - Nominal Columns</h3>

In [9]:
# Function to split string at capital letters
def split_on_capitals(text):
    new_text = [text[0]]  
    for char in text[1:]:
        if char.isupper():
            new_text.append(' ')
        new_text.append(char)
    return ''.join(new_text).title()

In [13]:
# Initialize a figure
fig = go.Figure()

# Loop through each object column and create a bar chart
for column in object_columns:
    # Count the occurrences of each category and churn combination
    counts = df.groupby([column, 'Churn']).size().reset_index(name='count')
    counts = counts.sort_values(by='count', ascending=False)

    # Calculate total counts for each category
    totals = counts.groupby(column)['count'].sum().reset_index(name='total_count')
    counts = pd.merge(counts, totals, on=column, suffixes=('', '_total'))
    counts['percentage'] = (counts['count'] / counts['total_count']) * 100
    counts['percentage'] = counts['percentage'].apply(lambda x: f"{x:.2f}")
    counts = counts.sort_values(by=[column, 'Churn'], ascending=[True, False])

    # Create a bar chart for the current column
    for churn_value in counts['Churn'].unique():
        churn_counts = counts[counts['Churn'] == churn_value]
        fig.add_trace(
            go.Bar(
                x=churn_counts[column],
                y=churn_counts['count'],
                name='Yes' if churn_value == True else 'No',
                text=churn_counts['percentage'] + '%',
                hoverinfo='text+y',
                marker=dict(color='red' if churn_value == True else 'lightgreen', opacity=0.45),
                hovertemplate=(
                    f'<b>{column}</b>: %{{x}}<br>' +
                    'Number of Customers: %{y}<br>' +
                    'Percentage: %{text}'
                ),
                visible=False,
                showlegend=True
            )
        )

# Make the first set of traces (one for each Churn value) visible by default
num_churn_values = len(df['Churn'].unique())
for trace in fig.data[:num_churn_values]:
    trace.visible = True

# Create dropdown buttons for each trace
buttons = []
for i, column in enumerate(object_columns):
    visibility = [False] * len(fig.data)
    start_idx = i * num_churn_values
    for j in range(num_churn_values):
        visibility[start_idx + j] = True
    buttons.append(dict(
        method='update',
        label=split_on_capitals(column),
        args=[{'visible': visibility},
              {'title': f'{split_on_capitals(column)} by Churn Status',
               'xaxis': {'title': split_on_capitals(column)}}]  # Update x-axis title dynamically
               ))

# Add dropdown menu to the layout
fig.update_layout(
    updatemenus=[dict(
        buttons=buttons,
        direction="down",
        pad={"r": 10, "t": 10},
        showactive=True,
        active=0,
        x=0.80,
        xanchor='left',
        y=1.22,
        yanchor='top'
    )],
    title=f'{split_on_capitals(object_columns[0])} by Churn',
    xaxis_title=split_on_capitals(object_columns[0]),  
    yaxis_title='Number of Customers',
    barmode='overlay',
    legend=dict(
        title='Churn'
    )
)

# Display the plot
fig.show()
fig.write_html("D:\College\Placements\Projects\Customer_Churn_Analysis\Interactive Graphs\churn_distribution_nominal_columns.html")


<h3 style="color:white; font-weight:bold;">2. Churn Distribution - Ordinal Columns</h3>

In [12]:
# Initialize a figure
fig = go.Figure()

# Loop through each ordinal column and create a bar chart
for column in ordinal_columns:
    # Count the occurrences of each category and churn combination
    counts = df.groupby([column, 'Churn']).size().reset_index(name='count')
    counts = counts.sort_values(by='count', ascending=False)

    # Calculate total counts for each category
    totals = counts.groupby(column)['count'].sum().reset_index(name='total_count')
    counts = pd.merge(counts, totals, on=column, suffixes=('', '_total'))
    counts['percentage'] = (counts['count'] / counts['total_count']) * 100
    counts['percentage'] = counts['percentage'].apply(lambda x: f"{x:.2f}")
    counts = counts.sort_values(by=[column, 'Churn'], ascending=[True, False])

    # Create a bar chart for the current column
    for churn_value in counts['Churn'].unique():
        churn_counts = counts[counts['Churn'] == churn_value]
        fig.add_trace(
            go.Bar(
                x=churn_counts[column],
                y=churn_counts['percentage'].astype(float),  # Use percentage for y-values
                name='Yes' if churn_value else 'No',  # Show 'Yes' or 'No' in the legend
                text=churn_counts['percentage'] + '%',
                customdata=churn_counts[['count']].values,  # Pass count values as customdata
                hoverinfo='text+y',
                hovertemplate=(
                    f'<b>{column}</b>: %{{x}}<br>' +
                    'Percentage: %{y:.2f}%<br>' + 
                    'Count: %{customdata[0]}<br>'  # Display count from customdata
                ),
                marker=dict(color='red' if churn_value else 'lightgreen', opacity=0.5),
                visible=False,
                showlegend=True
            )
        )

# Make the first set of traces (one for each Churn value) visible by default
num_churn_values = len(df['Churn'].unique())
for trace in fig.data[:num_churn_values]:
    trace.visible = True

# Create dropdown buttons for each trace
buttons = []
for i, column in enumerate(ordinal_columns):
    visibility = [False] * len(fig.data)
    start_idx = i * num_churn_values
    for j in range(num_churn_values):
        visibility[start_idx + j] = True
    buttons.append(dict(
        method='update',
        label=split_on_capitals(column),
        args=[{'visible': visibility},
              {'title': f'{split_on_capitals(column)} by Churn Status',
               'xaxis': {'title': split_on_capitals(column)},
               'yaxis_title': 'Percentage'}]  # Update y-axis title dynamically
    ))

# Add dropdown menu to the layout
fig.update_layout(
    updatemenus=[dict(
        buttons=buttons,
        direction="down",
        pad={"r": 10, "t": 10},
        showactive=True,
        active=0,
        x=0.80,
        xanchor='left',
        y=1.22,
        yanchor='top'
    )],
    title=f'{split_on_capitals(ordinal_columns[0])} by Churn Status',
    xaxis_title=split_on_capitals(ordinal_columns[0]),  
    yaxis_title='Percentage',
    barmode='group',
    legend=dict(
        title='Churn'  
    ) 
)

# Display the plot
fig.show()
fig.write_html("D:\College\Placements\Projects\Customer_Churn_Analysis\Interactive Graphs\churn_distribution_ordinal_columns.html")