In [1]:
from pyvis import network as net
from IPython.display import display, HTML
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.express as px

In [2]:
def create_funnel_graph(df, chart_title='Funnel graph', source='source', target='target', value='user', pct_all='pct_all', pct_source='pct_source'):
    all_nodes = df[source].values.tolist() + df[target].values.tolist()

    source_indices = [all_nodes.index(source) for source in df[source]]
    target_indices = [all_nodes.index(target) for target in df[target]]

    colors = px.colors.qualitative.Plotly

    node_colors_mappings = dict([(node, np.random.choice(colors)) for node in all_nodes])

    node_colors = [node_colors_mappings[node] for node in all_nodes]
    edge_colors = [node_colors_mappings[node] for node in df[source]]
    
    outgoing_count = [len(df[df[source]==node]) for node in all_nodes]
    incomming_count = [len(df[df[target]==node]) for node in all_nodes]
    pct_all_list = [max(df[df[source]==node][pct_all].sum(), df[df[target]==node][pct_all].sum()) for node in all_nodes]
    user_count = [max(df[df[source]==node][value].sum(), df[df[target]==node][value].sum())/1000 for node in all_nodes]
    node_data = [[pct_all_list[i], outgoing_count[i], incomming_count[i], user_count[i]] for i in range(len(all_nodes))]
    
    fig = go.Figure(data=[
        go.Sankey(
            node=dict(pad=20,
                    thickness=20,
                    line=dict(color='black', width=1.0),
                    label=all_nodes,
                    customdata=node_data,
                    hovertemplate='%{customdata[0]:.1%} of all user (%{customdata[3]:.2f}k) <br>Outgoing links: %{customdata[1]} <br>Incoming links: %{customdata[2]} <extra></extra>',
                    color=node_colors),
            link=dict(source=source_indices,
                    target=target_indices,
                    value=df[value],
                    customdata=df[pct_source],
                    hovertemplate='%{customdata:.1%} from <b>%{source.label}</b> to <b>%{target.label}</b><extra></extra>',
                    color=edge_colors)
        )
    ])

    fig.update_layout(title_text=chart_title,
                    height=1000,
                    font=dict(size=10, color='white'),
                    plot_bgcolor='rgba(0,0,0,0)',
                    paper_bgcolor='grey')

    return fig



In [5]:
df = pd.read_csv('data/scs_day0.csv')
df['pct_source'] = df.apply(lambda x: x['user'] / df[df['source']==x['source']]['user'].sum(), axis=1)
df['pct_all'] = df.apply(lambda x: x['user'] / df[df['source']=='1. login']['user'].sum(), axis=1)
df.sort_values(by=['source', 'pct_source'], ascending=[True, False], inplace=True)

In [6]:
# df = pd.read_csv('data/scs_stage_1.csv')
fig = create_funnel_graph(df, chart_title='SCS ON Day0 funnel graph')

fig.show()

In [111]:
fig.write_html("./result_graph/scs_on_day0_funnel.html")