<a href="https://colab.research.google.com/github/gabrielborja/parc_de_montjuic/blob/main/interactive_cancellations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Interactive Visualization

In [None]:
# Upgrade Matplotlib
!pip install matplotlib --upgrade
!pip install plotly --upgrade

In [1]:
# Importing python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from ipywidgets import interact
#setting plot size
#plt.rcParams["figure.figsize"] = (7.0,4.0)

## Loading Data

In [None]:
# Loading data from local drive
from google.colab import files
uploaded1 = files.upload()

In [None]:
# Storing loaded data from excel to a pandas dataframe
import io
df = pd.read_excel(io.BytesIO(uploaded1['2022_11_01_op.xlsx']))

In [None]:
# Chainging columns to lower case
df.rename(columns={i:i.lower() for i in df.columns}, inplace=True)

In [None]:
# Checking the dataframe info
df.info()

In [None]:
df.head()

In [None]:
# Splitting columns to lists for interactive visualization
date_col = df.select_dtypes(include=['datetime64[ns]']).columns.tolist()
dim_col = df.select_dtypes(include=['object']).columns.tolist()
eve_col = [i for i in dim_col if ('event') in i]
cat_col = [i for i in dim_col if ('category') in i]
num_col = df.select_dtypes(include=['float64', 'int64']).columns.tolist()

In [None]:
df = df.assign(maturity_category = pd.cut(df['lifetime_days'],3,['early','medium','late']))

In [None]:
df['maturity_category']

# Sankey of 3 variables

## Preparing data for 3 variables

In [None]:
# Choose columns for analysis
col_1 = 'loyalty_stage_category'
col_2 = 'event_category'
col_3 = 'operator_out'

In [None]:
# Preparing first source table
df3_temp1 = df.groupby(by=[col_1,col_2]).agg(value = ('name','count')).reset_index()
df3_temp1.rename(columns={col_1:'source',col_2:'target'}, inplace=True)
df3_temp1

In [None]:
# Preparing second source table
df3_temp2 = df.groupby(by=[col_2,col_3]).agg(value = ('name','count')).reset_index()
df3_temp2.rename(columns={col_2:'source',col_3:'target'}, inplace=True)
df3_temp2

In [None]:
# Concatenating the 2 temporary dataframes
links_3 = pd.concat([df3_temp1, df3_temp2], axis=0).reset_index(drop=True)
#links_3

In [None]:
# Generating a list of unique values present in source and target columns
unique_source_target_3 = list(pd.unique(links_3[['source','target']].values.ravel('K')))
#unique_source_target

In [None]:
# Mapping a dictionary
mapping_dict =  {k:v for v,k in enumerate(unique_source_target_3)}
#mapping_dict

In [None]:
# Generating links mapped with indexes
links_3_2 = links_3.copy()
links_3_2 = links_3_2.assign(source = links_3_2['source'].map(mapping_dict),
                             target = links_3_2['target'].map(mapping_dict))
#links_3_3

In [None]:
# Generating list of dictionary
links_3_dict = links_3_2.to_dict(orient='list')
#links_3_dict

## Sankey Visualization

In [None]:
# Generating Sankey Diagram
fig_3 = go.Figure(
    data=[go.Sankey(
        # Setting the data for the node
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color='black',width=0.5),
            label=unique_source_target_3,
            color=['#a6a6a6','#ffff00','#00b050','#ff0000','#ffc000','#0080FF','#b4b404','#64b23b',
                   '#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']
        ),
        # Setting the data for the link
        link = dict(
            source=links_3_dict['source'],
            target=links_3_dict['target'],
            value=links_3_dict['value'],
            #label=links_dict['source']
        )
    )]
)

In [None]:
# Plotting the Sankey Diagram
fig_3.update_layout(title_text='Loyalty, Churn and Operators Diagram - October 2022', font_size=10)
fig_3.show()

# Sankey of 3 variables with dynamic filter



## Preparing data for 3 variables with dynamic filter

In [None]:
# Function for preparing data for Sankey Diagram
def prepare_sankey_4(column1, column2, column3, filter_num = 0):
  ''' Prepares sankey data by providing columns and filter
      Returns unique source target and links dictionary '''
  col_1 = column1
  col_2 = column2
  col_3 = column3
  filt_1 = df[col_1].unique().tolist()[filter_num]
  # Preparing first source table
  df_temp1 = df[df[col_1]==filt_1].copy().groupby(by=[col_1,col_2]).agg(value = ('name','count')).reset_index()
  df_temp1.rename(columns={col_1:'source',col_2:'target'}, inplace=True)
  # Preparing second source table
  df_temp2 = df[df[col_1]==filt_1].copy().groupby(by=[col_2,col_3]).agg(value = ('name','count')).reset_index()
  df_temp2.rename(columns={col_2:'source',col_3:'target'}, inplace=True)
  # Concatenating 2 temp dataframes
  links = pd.concat([df_temp1, df_temp2], axis=0).reset_index(drop=True)
  # Generating a list of unique values present in source and target columns
  unique_source_target = list(pd.unique(links[['source','target']].values.ravel('K')))
  # Mapping a dictionary
  mapping_dict =  {k:v for v,k in enumerate(unique_source_target)}
  # Generating links mapped with indexes
  links_2 = links.copy()
  links_2 = links_2.assign(source = links_2['source'].map(mapping_dict), target = links_2['target'].map(mapping_dict))
  # Generating list of dictionary
  links_dict = links_2.to_dict(orient='list')
  return (unique_source_target,links_dict,filt_1)

#prepare_sankey_4('loyalty_stage_category', 'event_category', 'operator_out', 0)

In [None]:
# Listing name of columns available for Sankey: Choose 3
print(dim_col,end='')

In [None]:
# Running function for sankey data
(un_source_target, links_d, filter) = prepare_sankey_4('loyalty_stage_category','event_category','operator_out',4)

## Sankey Visualization

In [None]:
# Generating Sankey Diagram
fig_4 = go.Figure(
    data=[go.Sankey(
        # Setting the data for the node
        node=dict(
            pad=15, thickness=20, line=dict(color='black',width=0.5), label=un_source_target,
            #color=['#ff0000','#ff0000','#818589','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
            #color=['#a6a6a6','#ff0000','#818589','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
            #color=['#ffff00','#ff0000','#818589','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
            #color=['#ffc000','#ff0000','#818589','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
            color=['#00b050','#ff0000','#818589','#64b23b','#a2a2a2','#cc071e','#79d64a','#01acfb','#990ae3','#15150c']),
        # Setting the data for the link
        link = dict(
            source=links_d['source'],
            target=links_d['target'],
            value=links_d['value'],
        ),
        ids=links_d['value']
    )]
)

In [None]:
# Plotting the Sankey Diagram
fig_4.update_layout(width=600,height=400,title_text=f'{filter} fase, Churn and Operators Diagram - October 2022', font_size=10)
fig_4.show()