<a href="https://colab.research.google.com/github/gabrielborja/parc_de_montjuic/blob/main/interactive_cancellations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Interactive Visualization

In [None]:
# Upgrade Matplotlib
!pip install matplotlib --upgrade
!pip install plotly --upgrade

In [None]:
# Importing python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from ipywidgets import interact
#setting plot size
#plt.rcParams["figure.figsize"] = (7.0,4.0)

## Loading Data

In [None]:
# Loading data from local drive
from google.colab import files
uploaded1 = files.upload()

In [None]:
# Storing loaded data from excel to a pandas dataframe
import io
df = pd.read_excel(io.BytesIO(uploaded1['2022_11_01_op.xlsx']))

In [None]:
# Chainging columns to lower case
df.rename(columns={i:i.lower() for i in df.columns}, inplace=True)

In [None]:
# Checking the dataframe info
df.info()

In [None]:
df.head()

In [None]:
# Splitting columns to lists for interactive visualization
date_col = df.select_dtypes(include=['datetime64[ns]']).columns.tolist()
dim_col = df.select_dtypes(include=['object']).columns.tolist()
eve_col = [i for i in dim_col if ('event') in i]
cat_col = [i for i in dim_col if ('category') in i]
num_col = df.select_dtypes(include=['float64', 'int64']).columns.tolist()

In [None]:
df = df.assign(maturity_category = pd.cut(df['lifetime_days'],3,['early','medium','late']))

In [None]:
df = df.assign(has_contract_comm = np.where(df['contract_commitment']==1,'Yes','No'))

In [None]:
cond_list = [df['product_type'].str.contains('Løpende'),df['product_type'].str.contains('Bedrift'),
             df['product_type'].str.contains('ice'), df['product_type'].str.contains('Fri')]
choice_list = ['Løpende','Bedrift_XX','iceSM_XX','Fri']
df = df.assign(product = np.select(cond_list, choice_list,'Other'))

In [None]:
cond_list = [df['product_type'].str.contains('Bedrift 1 GB'),df['product_type'].str.contains('iceSMB 8 GB'),
             df['product_type'].str.contains('iceSMB Løpende'), df['product_type'].str.contains('iceSMB 1 GB'),
             df['product_type'].str.contains('Bedrift'), df['product_type'].str.contains('iceSMB'),
             df['product_type'].str.contains('Fri')]
choice_list = ['Bedrift 1 GB','iceSMB 8 GB','iceSMB Løpende','iceSMB 1 GB','Bedrift_XX','iceSMB_XX','Fri']
df = df.assign(product = np.select(cond_list, choice_list,'Other'))

In [None]:
#df['maturity_category']
df[['product_type','product']][:50]

# Sankey of 2 variables


## Preparing data for 2 variables

In [None]:
# Function for preparing data for Sankey Diagram
def prepare_sankey_1(column1, column2):
  ''' Prepares sankey data by providing columns and filter
      Returns unique source target and links dictionary '''
  col_1 = column1
  col_2 = column2
  # Preparing first source table
  links = df.copy().groupby(by=[col_1,col_2]).agg(value = ('name','count')).reset_index()
  links.rename(columns={col_1:'source',col_2:'target'}, inplace=True)
  # Generating a list of unique values present in source and target columns
  unique_source_target = list(pd.unique(links[['source','target']].values.ravel('K')))
  # Mapping a dictionary
  mapping_dict =  {k:v for v,k in enumerate(unique_source_target)}
  # Generating links mapped with indexes
  links_2 = links.copy()
  links_2 = links_2.assign(source = links_2['source'].map(mapping_dict), target = links_2['target'].map(mapping_dict))
  # Generating list of dictionary
  links_dict = links_2.to_dict(orient='list')
  return (unique_source_target,links_dict)

#prepare_sankey_2('contract_category','operator_out','event_category',0)

In [None]:
# Listing name of columns available for Sankey: Choose 3
print(dim_col,end='')

In [None]:
# Running function for sankey data
(un_source_target, links_d) = prepare_sankey_1('product','operator_out')

In [None]:
un_source_target

## Sankey visualization

In [None]:
# Generating Sankey Diagram
fig_1 = go.Figure(
    data=[go.Sankey(
        # Setting the data for the node
        node=dict(
            pad=15, thickness=20, line=dict(color='black',width=0.5), label=un_source_target,
            color=['black']
            #color=['#00ff00','#000000','#000000','#000000','#64b23b','#a2a2a2','#cc071e',
            #       '#79d64a','#01acfb','#990ae3','#15150c','#3c3766']
            ),
        # Setting the data for the link
        link = dict(
            source=links_d['source'],
            target=links_d['target'],
            value=links_d['value'],
        ),
    )]
)

In [None]:
# Plotting the Sankey Diagram
fig_1.update_layout(width=600,height=400,title_text=f'Churn and Product Type - October 2022',title_x=0.5,font_size=10)
fig_1.show()

# Sankey of 2 variables and 2 filters



## Preparing data for 2 variables and two filters

In [None]:
# Function for preparing data for Sankey Diagram
def prepare_sankey_2(column1, column2, filter='event_category',filter_num = 0):
  ''' Prepares sankey data by providing columns and filter
      Returns unique source target and links dictionary '''
  col_1 = column1
  col_2 = column2
  fil = filter
  filt_1 = df[fil].unique().tolist()[filter_num]
  # Preparing first source table
  links = df[df[fil]==filt_1].copy().groupby(by=[col_1,col_2]).agg(value = ('name','count')).reset_index()
  links.rename(columns={col_1:'source',col_2:'target'}, inplace=True)
  # Generating a list of unique values present in source and target columns
  unique_source_target = list(pd.unique(links[['source','target']].values.ravel('K')))
  # Mapping a dictionary
  mapping_dict =  {k:v for v,k in enumerate(unique_source_target)}
  # Generating links mapped with indexes
  links_2 = links.copy()
  links_2 = links_2.assign(source = links_2['source'].map(mapping_dict), target = links_2['target'].map(mapping_dict))
  # Generating list of dictionary
  links_dict = links_2.to_dict(orient='list')
  return (unique_source_target,links_dict,filt_1)

#prepare_sankey_2('contract_category','operator_out','event_category',0)

In [None]:
# Listing name of columns available for Sankey: Choose 3
print(dim_col,end='')

In [None]:
# Running function for sankey data
(un_source_target, links_d, filter) = prepare_sankey_2('product_type','operator_out','operator_out',0)

In [None]:
un_source_target

## Sankey visualization

In [None]:
# Generating Sankey Diagram
fig_2 = go.Figure(
    data=[go.Sankey(
        # Setting the data for the node
        node=dict(
            pad=15, thickness=20, line=dict(color='black',width=0.5), label=un_source_target,
            #color=['#00ff00','#64b23b','#a2a2a2','#cc071e',
            #       '#79d64a','#01acfb','#990ae3','#15150c','#3c3766']
            color=['#00ff00','#000000','#000000','#000000','#1B4F72','#CD6155','#909497']
            ),
        # Setting the data for the link
        link = dict(
            source=links_d['source'],
            target=links_d['target'],
            value=links_d['value'],
        ),
        ids=links_d['value']
    )]
)

In [None]:
# Plotting the Sankey Diagram
fig_2.update_layout(width=600,height=400,title_text=f'{filter} - October 2022',title_x=0.5,font_size=10)
fig_2.show()

# Sankey of 3 variables, no filter

## Preparing data for 3 variables, no filter

In [None]:
# Function for preparing data for Sankey Diagram
def prepare_sankey_3(column1, column2, column3, value):
  ''' Prepares sankey data by providing columns and filter
      Returns unique source target and links dictionary '''
  col_1 = column1
  col_2 = column2
  col_3 = column3
  val_selected = value
    # Preparing first source table
  df_temp1 = df.copy().groupby(by=[col_1,col_2]).agg(value = (val_selected,'count')).reset_index()
  df_temp1.rename(columns={col_1:'source',col_2:'target'}, inplace=True)
  return df_temp1
  # Preparing second source table
  df_temp2 = df.copy().groupby(by=[col_2,col_3]).agg(value = (val_selected,'count')).reset_index()
  df_temp2.rename(columns={col_2:'source',col_3:'target'}, inplace=True)
  # Concatenating 2 temp dataframes
  links = pd.concat([df_temp1, df_temp2], axis=0).reset_index(drop=True)
  # Generating a list of unique values present in source and target columns
  unique_source_target = list(pd.unique(links[['source','target']].values.ravel('K')))
  # Mapping a dictionary
  mapping_dict =  {k:v for v,k in enumerate(unique_source_target)}
  # Generating links mapped with indexes
  links_2 = links.copy()
  links_2 = links_2.assign(source = links_2['source'].map(mapping_dict), target = links_2['target'].map(mapping_dict))
  # Generating list of dictionary
  links_dict = links_2.to_dict(orient='list')
  return (unique_source_target,links_dict)

prepare_sankey_3('loyalty_stage','binding','product_type','percent')

In [None]:
# Listing name of columns available for Sankey: Choose 3
print(dim_col,end='')

In [None]:
# Running function for sankey data
(un_source_target, links_d) = prepare_sankey_3('binding','product_type','percent')

In [None]:
un_source_target

## Sankey Visualization

In [None]:
# Generating Sankey Diagram
fig_3 = go.Figure(
    data=[go.Sankey(
        # Setting the data for the node
        node=dict(
            pad=15, thickness=20, line=dict(color='black',width=0.5), label=un_source_target,
            #color=['#00ff00','#000000','#000000','#000000','#000000','#000000','#64b23b',
            #       '#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c','#3c3766']
            color=['#00ff00','#000000','#000000','#000000','#1B4F72','#CD6155','#909497']
                  ),
        # Setting the data for the link
        link = dict(
            source=links_d['source'],
            target=links_d['target'],
            value=links_d['value'],
        ),
        ids=links_d['value']
    )]
)

In [None]:
# Plotting the Sankey Diagram
fig_3.update_layout(width=600,height=400,title_text=f'October 2022',title_x=0.5,font_size=10)
fig_3.show()

# Sankey of 3 variables with dynamic filter



## Preparing data for 3 variables with dynamic filter

In [None]:
# Function for preparing data for Sankey Diagram
def prepare_sankey_4(column1, column2, column3, filter_num = 0):
  ''' Prepares sankey data by providing columns and filter
      Returns unique source target and links dictionary '''
  col_1 = column1
  col_2 = column2
  col_3 = column3
  filt_1 = df[col_1].unique().tolist()[filter_num]
  # Preparing first source table
  df_temp1 = df[df[col_1]==filt_1].copy().groupby(by=[col_1,col_2]).agg(value = ('name','count')).reset_index()
  df_temp1.rename(columns={col_1:'source',col_2:'target'}, inplace=True)
  # Preparing second source table
  df_temp2 = df[df[col_1]==filt_1].copy().groupby(by=[col_2,col_3]).agg(value = ('name','count')).reset_index()
  df_temp2.rename(columns={col_2:'source',col_3:'target'}, inplace=True)
  # Concatenating 2 temp dataframes
  links = pd.concat([df_temp1, df_temp2], axis=0).reset_index(drop=True)
  # Generating a list of unique values present in source and target columns
  unique_source_target = list(pd.unique(links[['source','target']].values.ravel('K')))
  # Mapping a dictionary
  mapping_dict =  {k:v for v,k in enumerate(unique_source_target)}
  # Generating links mapped with indexes
  links_2 = links.copy()
  links_2 = links_2.assign(source = links_2['source'].map(mapping_dict), target = links_2['target'].map(mapping_dict))
  # Generating list of dictionary
  links_dict = links_2.to_dict(orient='list')
  return (unique_source_target,links_dict,filt_1)

#prepare_sankey_4('loyalty_stage_category','operator_out_commercial_name','operator_out',0)

In [None]:
# Listing name of columns available for Sankey: Choose 3
print(dim_col,end='')

In [None]:
# Running function for sankey data
(un_source_target, links_d, filter) = prepare_sankey_4('loyalty_stage_category','operator_out_commercial_name','operator_out',4)

In [None]:
un_source_target

## Sankey Visualization

In [None]:
# Generating Sankey Diagram
fig_4 = go.Figure(
    data=[go.Sankey(
        # Setting the data for the node
        node=dict(
            pad=15, thickness=20, line=dict(color='black',width=0.5), label=un_source_target,
            #color=['#ff0000','#818589','#228c22','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
            #color=['#a6a6a6','#ff0000','#818589','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
            #color=['#ffff00','#818589','#228c22','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
            #color=['#ffc000','#818589','#228c22','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
            color=['#00b050','#818589','#228c22','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
        # Setting the data for the link
        link = dict(
            source=links_d['source'],
            target=links_d['target'],
            value=links_d['value'],
        )
    )]
)

In [None]:
# Plotting the Sankey Diagram
fig_4.update_layout(width=600,height=400,title_text=f'{filter} stage & Product Type - October 2022',title_x=0.5,font_size=10)
fig_4.show()

# Sankey of 4 variables, no filter

## Preparing data for 3 variables, no filter

In [None]:
# Function for preparing data for Sankey Diagram
def prepare_sankey_5(column1, column2, column3, column4):
  ''' Prepares sankey data by providing columns and filter
      Returns unique source target and links dictionary '''
  col_1 = column1
  col_2 = column2
  col_3 = column3
  col_4 = column4
  # Preparing first source table
  df_temp1 = df.copy().groupby(by=[col_1,col_2]).agg(value = ('name','count')).reset_index()
  df_temp1.rename(columns={col_1:'source',col_2:'target'}, inplace=True)
  # Preparing second source table
  df_temp2 = df.copy().groupby(by=[col_2,col_3]).agg(value = ('name','count')).reset_index()
  df_temp2.rename(columns={col_2:'source',col_3:'target'}, inplace=True)
  # Preparing third source table
  df_temp3 = df.copy().groupby(by=[col_3,col_4]).agg(value = ('name','count')).reset_index()
  df_temp3.rename(columns={col_3:'source',col_4:'target'}, inplace=True)
  # Concatenating 3 temp dataframes
  links = pd.concat([df_temp1, df_temp2, df_temp3], axis=0).reset_index(drop=True)
  # Generating a list of unique values present in source and target columns
  unique_source_target = list(pd.unique(links[['source','target']].values.ravel('K')))
  # Mapping a dictionary
  mapping_dict =  {k:v for v,k in enumerate(unique_source_target)}
  # Generating links mapped with indexes
  links_2 = links.copy()
  links_2 = links_2.assign(source = links_2['source'].map(mapping_dict), target = links_2['target'].map(mapping_dict))
  # Generating list of dictionary
  links_dict = links_2.to_dict(orient='list')
  return (unique_source_target,links_dict)

#prepare_sankey_5('product','event_category','loyalty_stage_category','operator_out')

In [None]:
# Listing name of columns available for Sankey: Choose 3
print(dim_col,end='')

In [None]:
# Running function for sankey data
(un_source_target, links_d) = prepare_sankey_5('loyalty_stage_category','event_category','product_type','operator_out')

In [None]:
un_source_target

## Sankey Visualization

In [None]:
# Generating Sankey Diagram
fig_5 = go.Figure(
    data=[go.Sankey(
        # Setting the data for the node
        node=dict(
            pad=15, thickness=20, line=dict(color='black',width=0.5), label=un_source_target,
            #color=['#00ff00','#000000','#000000','#000000','#000000','#000000','#64b23b',
            #       '#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c','#3c3766']
            #color=['#00ff00','#000000','#000000','#000000','#1B4F72','#CD6155','#909497']
                  ),
        # Setting the data for the link
        link = dict(
            source=links_d['source'],
            target=links_d['target'],
            value=links_d['value'],
        ),
        ids=links_d['value']
    )]
)

In [None]:
# Plotting the Sankey Diagram
fig_5.update_layout(width=900,height=400,title_text=f'Churn October 2022 - 4 dimensions example',title_x=0.5,font_size=10)
fig_5.show()

# Sankey No Aggregation

## Preparing data for 3 variables, no filter

In [None]:
# Function for preparing data for Sankey Diagram
def prepare_sankey_6(column1, column2, column3, value):
  ''' Prepares sankey data by providing columns and filter
      Returns unique source target and links dictionary '''
  col_1 = column1
  col_2 = column2
  col_3 = column3
  val_selected = value
    # Preparing first source table
  df_temp1 = df[df['binding']=='No'][[col_1, col_2, val_selected]].copy()
  df_temp1.rename(columns={col_1:'source',col_2:'target',val_selected:'value'}, inplace=True)
    # Preparing second source table
  df_temp2 = df[df['binding']=='No'][[col_2, col_3, val_selected]].copy()
  df_temp2.rename(columns={col_2:'source',col_3:'target',val_selected:'value'}, inplace=True)
  # Concatenating 2 temp dataframes
  links = pd.concat([df_temp1, df_temp2], axis=0).reset_index(drop=True)
  # Generating a list of unique values present in source and target columns
  unique_source_target = list(pd.unique(links[['source','target']].values.ravel('K')))
  # Mapping a dictionary
  mapping_dict =  {k:v for v,k in enumerate(unique_source_target)}
  # Generating links mapped with indexes
  links_2 = links.copy()
  links_2 = links_2.assign(source = links_2['source'].map(mapping_dict), target = links_2['target'].map(mapping_dict))
  # Generating list of dictionary
  links_dict = links_2.to_dict(orient='list')
  return (unique_source_target,links_dict)

#prepare_sankey_6('loyalty_stage','binding','product_type','percent')

In [None]:
# Listing name of columns available for Sankey: Choose 3
print(dim_col,end='')

In [None]:
# Running function for sankey data
(un_source_target, links_d) = prepare_sankey_6('loyalty_stage','binding','product_type','total_count')

In [None]:
un_source_target

## Sankey Visualization

In [None]:
# Generating Sankey Diagram
fig_6 = go.Figure(
    data=[go.Sankey(
        # Setting the data for the node
        node=dict(
            pad=15, thickness=20, line=dict(color='black',width=0.5), label=un_source_target,
            #color=['#00ff00','#000000','#000000','#000000','#000000','#000000','#64b23b',
            #       '#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c','#3c3766']
            #color=['#00ff00','#000000','#000000','#000000','#1B4F72','#CD6155','#909497']
            ),
        # Setting the data for the link
        link = dict(
            source=links_d['source'],
            target=links_d['target'],
            value=links_d['value'],
        ),
        ids=links_d['value']
    )]
)

In [None]:
# Plotting the Sankey Diagram
fig_6.update_layout(width=600,height=400,title_text=f'Loyalty & No Contract Commitment - October 2022',title_x=0.5,font_size=10)
fig_6.show()