In [3]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [4]:
df = pd.read_csv('telko-churn-bersih.csv')

In [5]:
df.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,TotalServices,Protection
0,Female,No,Yes,No,1,No,No,DSL,No,Yes,...,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,Yes
1,Male,No,No,No,34,Yes,No,DSL,Yes,No,...,No,No,One year,No,Mailed check,56.95,1889.5,No,3,Yes
2,Male,No,No,No,2,Yes,No,DSL,Yes,Yes,...,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,3,Yes
3,Male,No,No,No,45,No,No,DSL,Yes,No,...,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,3,Yes
4,Female,No,No,No,2,Yes,No,Fiber optic,No,No,...,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,No


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7021 entries, 0 to 7020
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7021 non-null   object 
 1   SeniorCitizen     7021 non-null   object 
 2   Partner           7021 non-null   object 
 3   Dependents        7021 non-null   object 
 4   tenure            7021 non-null   int64  
 5   PhoneService      7021 non-null   object 
 6   MultipleLines     7021 non-null   object 
 7   InternetService   7021 non-null   object 
 8   OnlineSecurity    7021 non-null   object 
 9   OnlineBackup      7021 non-null   object 
 10  DeviceProtection  7021 non-null   object 
 11  TechSupport       7021 non-null   object 
 12  StreamingTV       7021 non-null   object 
 13  StreamingMovies   7021 non-null   object 
 14  Contract          7021 non-null   object 
 15  PaperlessBilling  7021 non-null   object 
 16  PaymentMethod     7021 non-null   object 


In [7]:
def hist_plot(p:str):
    fig = px.histogram(x=df[f'{p}'].sort_values().astype(str), color=df.Churn, 
                   histnorm='percent', opacity=0.5, nbins=100)
    fig.update_layout(barmode='overlay', width=600, height=400, legend_title='Churn?')
    fig.update_xaxes(title=f'{p}')
    fig.show()

def bar_plot(kolom):
    fig = px.histogram(df, x=kolom, color='Churn', histnorm='percent', width=600, height=400)
    fig.update_layout(barmode='group',legend_title='Churn?', title=f'{kolom.title()} Bar Chart')
    fig.update_xaxes(title='')
    fig.show()

def pie_subplots(kolom):
    fig = make_subplots(
        rows=2, cols=2,
        specs=[[{"type": "domain", "rowspan":2}, {"type": "domain"}],
            [{}, {"type": "domain"}]],
        subplot_titles=("Total Client", 'Churn No', '', 'Churn Yes')
    )
    fig.add_trace(go.Pie(labels=df[kolom].value_counts().index,values=df[kolom].value_counts().values),
                row=1, col=1)
    fig.add_trace(go.Pie(labels=df[df['Churn']=='No'][kolom].value_counts().index,values=df[df['Churn']=='No'][kolom].value_counts().values),
                row=1, col=2)
    fig.add_trace(go.Pie(labels=df[df['Churn']=='Yes'][kolom].value_counts().index,values=df[df['Churn']=='Yes'][kolom].value_counts().values),
                row=2, col=2)


    fig.update_layout(height=600,width=900)
    fig.update_traces(textposition='inside',textinfo='percent+label',hole=0.3, pull=[0,0.01,0.01])

    fig.show()

In [8]:
all_col_num = df.select_dtypes(include=['float64','int64'])
for i in range(len(all_col_num.columns)):
    hist_plot(all_col_num.columns[i])

In [9]:
all_col_cat = df.select_dtypes(include=['object'])
all_col_cat = all_col_cat.drop(columns='Churn')
for i in range(len(all_col_cat.columns)):
    bar_plot(all_col_cat.columns[i])

In [10]:
df_corr = df.corr(numeric_only=True)
px.imshow(df_corr, text_auto=True, color_continuous_scale=px.colors.sequential.Emrld,aspect='auto', width=600, height=400)

In [11]:
df.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn', 'TotalServices',
       'Protection'],
      dtype='object')

In [12]:
pie_subplots('PaymentMethod')