In [None]:
#importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt #for visualization
import matplotlib

In [None]:
data_df = pd.read_csv('Churn.csv')

## Overview of Churn Data

In [None]:
print(f'Number of rows: {data_df.shape[0]}')
print(f'Number of columns: {data_df.shape[1]}')
print(f'\nFeatures :\n{data_df.columns.to_list()}')
print(f'\nMissing data:\n{data_df.isnull().sum()}')

##  Overall Churn 

In [None]:
gb_df = data_df.groupby('Churn').size().to_frame().reset_index()
gb_df.rename(columns={0:'Count'},inplace =True)

plt.rcParams['figure.figsize'] = [5, 5]
plt.rcParams['figure.dpi'] = 100
plt.pie(gb_df['Count'],labels = gb_df['Churn'],wedgeprops = {'edgecolor':'black'},
        autopct=lambda p:f'{p:.2f}% ( {p*(gb_df.Count.sum())/100:.0f} )')

plt.title('Overall Company Churn')
plt.show()

## Exploring Categorical Features 

In [None]:
#Define Bar Chart Function
def bar(feature, df = data_df):
    gb_df = df.groupby([feature,'Churn']).size()
    gb_df = gb_df.unstack()
    gb_df.index.name = None    
    
    size = df[feature].count()
    str = f'{df[feature].value_counts().index.to_list()}'
    pcr = f'{round(df[feature].value_counts()/size*100,2).to_list()}'
            
    gb_df.plot(kind = 'bar',ylabel= 'Count', title = f'Churn by {feature}')
    plt.show()
    print(f'Percentage values of {str} are {pcr} respectively.')

bar('gender')

### Churn based on Demography

In [None]:
#Gender feature Plots
bar('gender')
#Senior Citizen feature Plots
data_df.loc[data_df['SeniorCitizen']==0,'SeniorCitizen'] = 'No'
data_df.loc[data_df['SeniorCitizen']==1,'SeniorCitizen'] = 'Yes'
bar('SeniorCitizen')
#Partner feature Plots
bar('Partner')
#Dependent feature Plots
bar('Dependents')


### Churn Based On Subscribed Services

In [None]:
bar('PhoneService')
bar('MultipleLines')
bar('InternetService')
bar('OnlineSecurity')
bar('OnlineBackup')
bar('DeviceProtection')
bar('TechSupport')
bar('StreamingTV')
bar('StreamingMovies')

### Churn Based On Payment Method

In [None]:
bar('Contract')
bar('PaperlessBilling')
bar('PaymentMethod')

### Explore Numerical Features

In [None]:
data_df.dtypes

In [None]:
try:
    data_df['TotalCharges'] = data_df['TotalCharges'].astype(float)
except ValueError as ve:
    print(ve)

In [None]:
data_df['TotalCharges'] = pd.to_numeric(data_df['TotalCharges'],errors = 'coerce')
data_df['TotalCharges'] = data_df['TotalCharges'].fillna(data_df['TotalCharges'].median())

In [None]:
def hist(feature, df = data_df):

    filt= df['Churn'] == 'No'
    plt.hist([df.loc[filt,feature],df.loc[~filt,feature]],bins = 10,edgecolor='black',label=['No','Yes'])
    
    plt.title(f'Churn by {feature}')
    plt.xlabel(f'{feature}')
    plt.ylabel('Count')
    
    plt.legend()
    plt.show()


In [None]:
hist('tenure')
hist('MonthlyCharges')
hist('TotalCharges')