In [None]:
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as offline_py

In [None]:
churn = pd.read_csv('churn.csv')
pages = pd.read_csv('pages.csv')
data = pd.read_csv('data.csv')
gender = pd.read_csv('gender.csv')

In [None]:
def churn_dist(df):
    """
    Creates barplot
    Args:
    Pandas churn Dataframe
    Returns:
    Series data used for the plot
    """

    task0 = go.Bar(
        x = ['Churned', 'Not Churned'],
        y = [churn.churn_bin.sum(), churn.shape[0] - churn.churn_bin.sum()],
        opacity=0.5
    )

    data = [task0]
    layout = go.Layout(
        title = {'text': "Churn Distribution",
                 'y':0.9,
                 'x':0.5,
                 'xanchor': 'center',
                 'yanchor': 'top'},
        
        xaxis = {"title": "Customer",
                "automargin": True},
        
        yaxis = {"title": "Count",
                 "automargin": True})

    figure = go.Figure(data = data, layout = layout)

    offline_py.iplot(figure)
    return figure.write_html("churn_dist.html")

In [None]:
def page_dist(pages):
    """
    Creates barplot
    Args:
    Pandas pages Dataframe
    Returns:
    Series data used for the plot
    """
    col_names = ['userId'] + [col[5:].replace(' ','_').lower() for col in pages.columns.tolist() if col != "userId"] 
    pages_dict = dict(zip(pages.columns.tolist(), col_names))
    pages = pages.rename(columns = pages_dict)

    task1 = go.Bar(
        x = (pages.iloc[:, 1:].sum()/pages.iloc[:, 1:].sum().sum()*100).sort_values()[:-1].index,
        y = (pages.iloc[:, 1:].sum()/pages.iloc[:, 1:].sum().sum()*100).sort_values()[:-1].round(2)*10,
        opacity = 0.5
    )

    data = [task1]
    layout = go.Layout(
        title = {'text': "Pages Distribution",
                 'y':0.9,
                 'x':0.5,
                 'xanchor': 'center',
                 'yanchor': 'top'},
        
        xaxis = {"title": "Page Type",
                "automargin": True},
        
        yaxis = {"title": "Percentage",
                 "automargin": True}
    )

    figure = go.Figure(data = data, layout = layout)

    offline_py.iplot(figure)

In [None]:
def page_box(pages):
    fig = px.box(pages.iloc[:, np.r_[1:3, 4:len(pages.columns)]].melt(), x = 'variable', y = 'value')
    fig.update_xaxes(
        title_text = "Page Type",
        title_font = {"size": 12},
        title_standoff = 25)

    fig.update_yaxes(
        title_text = "Page Usage",
        title_standoff = 25)

    fig.update_layout(
        title="Page Usage",
        title_x=0.5,
        xaxis_title="Page Type",
        yaxis_title="Page Range Usage"
        )
    fig.show()

In [None]:
def page_boxes(pages, churn):    
    page_cols = pages.iloc[:,1:].columns
    df = pd.merge(pages, churn, how = "right")
    
    fig = make_subplots(
        rows=6, cols=3,
        subplot_titles=[col for col in pages.iloc[:,1:].columns])

    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[0]], name = page_cols[0]), row=1, col=1)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[1]], name = page_cols[1]), row=1, col=2)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[2]], name = page_cols[2]), row=1, col=3)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[3]], name = page_cols[3]), row=2, col=1)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[4]], name = page_cols[4]), row=2, col=2)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[5]], name = page_cols[5]), row=2, col=3)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[6]], name = page_cols[6]), row=3, col=1)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[7]], name = page_cols[7]), row=3, col=2)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[8]], name = page_cols[8]), row=3, col=3)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[9]], name = page_cols[9]), row=4, col=1)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[10]], name = page_cols[10]), row=4, col=2)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[11]], name = page_cols[11]), row=4, col=3)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[12]], name = page_cols[12]), row=5, col=1)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[13]], name = page_cols[13]), row=5, col=2)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[14]], name = page_cols[14]), row=5, col=3)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[15]], name = page_cols[15]), row=6, col=1)
    fig.add_trace(go.Box(x = df.churn_bin, y = df[page_cols[16]], name = page_cols[16]), row=6, col=2)


    fig.update_layout(height=1000, width=900,
                  title_text="Page Events and Churn", title_x=0.5)

    fig.show()

In [None]:
def make_hist(data, churn, column):
    df = pd.merge(data, churn, how = "right")
    df.rename(columns = {"churn_bin":"churn"}, inplace = True)
    df.loc[df['churn'] == 0, 'churn'] = 'Not Churn'
    df.loc[df['churn'] == 1, 'churn'] = 'Churn'
    fig = px.histogram(df, x = column, color = "churn",
                       nbins = 50, marginal = "box", opacity = 0.5, hover_data = df.columns)
    

    fig.update_layout(
        title = column.replace('_', ' ').title(),
        title_x = 0.5,
        xaxis_title = column.replace('_', ' ').title(),
        yaxis_title = "Count"
        )
    fig.show()

In [None]:
def gender_bar(gender, churn):
    gender.set_index('userId',inplace=True)
    gender[gender==1].stack().reset_index().drop(0,1)
    gender = gender[gender==1].stack().reset_index().drop(0,1)
    df = pd.merge(gender, churn, how = "right", on = 'userId')
    df.rename(columns = {'level_1':'gender', "churn_bin":"churn"}, inplace = True)
    df.loc[df['churn'] == 0, 'churn'] = 'Not Churn'
    df.loc[df['churn'] == 1, 'churn'] = 'Churn'
    df.loc[df['gender'] == 'gender_F', 'Female'] = 'Female'
    df.loc[df['gender'] == 'gender_M', 'gender_M'] = 'Male'
    fig = px.bar(df, x='gender', color='churn')
    fig.update_layout(
        barmode='group',
        title = "Gender And Churn",
        title_x = 0.5,
        xaxis_title = "Gender",
        yaxis_title = "Count")
    
    fig.show()

In [None]:
churn_dist(churn)

In [None]:
page_dist(pages)

In [None]:
page_box(pages)

In [None]:
page_boxes(pages, churn)

In [None]:
make_hist(data, churn, "count_user_logs")

In [None]:
make_hist(data, churn, "max_session")

In [None]:
make_hist(data, churn, "avg_length")

In [None]:
gender_bar(gender, churn)