<a href="https://colab.research.google.com/github/gabrielborja/parc_de_montjuic/blob/main/interactive_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Interactive Visualization

In [None]:
# Upgrade Matplotlib
!pip install matplotlib --upgrade

In [1]:
# Importing python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact
#setting plot size
plt.rcParams["figure.figsize"] = (7.0,4.0)

## Loading Data

In [13]:
# Loading data from local drive
from google.colab import files
uploaded1 = files.upload()

Saving Script_202210261607.xlsx to Script_202210261607.xlsx


In [14]:
# Storing loaded data from excel to a pandas dataframe
import io
df1 = pd.read_excel(io.BytesIO(uploaded1['Script_202210261607.xlsx'])) #2022-10-14-script.xlsx

In [15]:
# Chainging columns to lower case
df1.rename(columns={i:i.lower() for i in df1.columns}, inplace=True)

In [None]:
# Creating time categories
df1 = df1.assign(year = df1['order_date'].dt.year,
                 quarter = df1['order_date'].dt.quarter,
                 month = df1['order_date'].dt.month,
                 month_name = df1['order_date'].dt.month_name(),
                 weeknum_order = df1['order_date'].dt.isocalendar().week,
                 weeknum_activation = df1['activation_date'].dt.isocalendar().week,
                 weeknum_cancellation = df1['cancellation_date'].dt.isocalendar().week,
                 day = df1['order_date'].dt.day_name()
                 )

In [16]:
# Checking the dataframe info
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23129 entries, 0 to 23128
Data columns (total 17 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   period                            23129 non-null  datetime64[ns]
 1   event_type                        23129 non-null  object        
 2   loyalty_category                  23129 non-null  object        
 3   has_contract_commitment_category  23129 non-null  object        
 4   event_date                        23129 non-null  datetime64[ns]
 5   first_priceplan_id                23129 non-null  object        
 6   product_type                      23129 non-null  object        
 7   terminal_brand                    21958 non-null  object        
 8   market_segment                    23129 non-null  object        
 9   first_sales_channel_name          19400 non-null  object        
 10  contract_remaining_days           13227 non-nu

In [17]:
df1.head(5)

Unnamed: 0,period,event_type,loyalty_category,has_contract_commitment_category,event_date,first_priceplan_id,product_type,terminal_brand,market_segment,first_sales_channel_name,contract_remaining_days,mb_total,voice_total,main_fee,discount,subscribers,running_total
0,2022-09-30,Churn,Loyalty,No,2022-09-01,INB-29203876-0,Bedrift 6 GB,APPLE,Soho (0-9),Outbound,,0.0,0.0,349.0,-174.5,1,1
1,2022-09-30,Churn,Loyalty,No,2022-09-01,TNB-26456244-0,Bedrift 1 GB,HUAWEI TECHNOLOGIES CO LTD,Soho (0-9),,,,,129.0,-38.7,1,2
2,2022-09-30,Churn,Loyalty,No,2022-09-01,INB-29203834-0,Bedrift 6 GB,1+,Soho (0-9),Outbound,,,,349.0,-174.5,1,3
3,2022-09-30,Churn,Loyalty,No,2022-09-01,TNB-26456160-0,Bedrift 1 GB,KAMMUNICA,Soho (0-9),,,,,129.0,-47.4,1,4
4,2022-09-30,Churn,Loyalty,No,2022-09-01,INB-27652924-0,Bedrift 12 GB,APPLE,Soho (0-9),Outbound,,445.0,10.966667,449.0,0.0,1,5


In [21]:
# Splitting columns to lists for interactive visualization
date_col = df1.select_dtypes(include=['datetime64[ns]']).columns.tolist()
dim_col = df1.select_dtypes(include=['object']).columns.tolist()
eve_col = [i for i in dim_col if ('event') in i]
cat_col = [i for i in dim_col if ('category') in i]
num_col = df1.select_dtypes(include=['float64', 'int64']).columns.tolist()

In [42]:
cat_col

['loyalty_category', 'has_contract_commitment_category']

## Visualization products

In [None]:
# Exploring numeric columns by Category
@interact(Period=date_col, Dimension=df1[dim_col[0]].unique(), Category=cat_col, Measure=num_col)
def visualize_category(Period, Dimension, Category, Measure):
  df = data=df1[(df1[dim_col[0]]==Dimension)].copy()
  sns.lineplot(x=Period, y=Measure, data=df, hue=Category)
  #plt.title(f'Product_type: {Dimension}; Measure: {Measure}; Category: {Category}')
  plt.title(f'Product_type: {Dimension}; Measure: {Measure}')
  plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
  plt.show()

interactive(children=(Dropdown(description='Period', options=('period',), value='period'), Dropdown(descriptio…

In [None]:
# Exploring numeric columns
@interact(Period=date_col, Dimension=df1[dim_col[0]].unique(), Category=cat_col, Measure=num_col)
def visualize_statistics(Period, Dimension, Category):
  df = data=df1[(df1[dim_col[0]]==Dimension)&(df1[cat_col[0]]=='Has_Usage')].copy()
  sns.lineplot(x=Period, y=num_col[2], data=df, color='red')
  #sns.lineplot(x=Period, y=num_col[3], data=df)
  #sns.lineplot(x=Period, y=num_col[4], data=df)
  sns.lineplot(x=Period, y=num_col[5], data=df, color='black')
  plt.title(f'Product_type: {Dimension}; Category: Has_Usage')
  plt.ylabel('MB', fontsize=12)
  plt.legend(labels=[num_col[2], num_col[5]], bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
  plt.show()

In [None]:
# Running total of numeric columns by Category
@interact(Period=date_col, Dimension=df1[dim_col[0]].unique(), Category=cat_col, Measure=num_col)
def visualize_category(Period, Dimension, Category, Measure):
  df = data=df1[(df1[dim_col[0]]==Dimension)].copy()
  df = df.assign()
  sns.lineplot(x=Period, y=Measure, data=df, hue=Category)
  #plt.title(f'Product_type: {Dimension}; Measure: {Measure}; Category: {Category}')
  plt.title(f'Product_type: {Dimension}; Measure: {Measure}')
  plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
  plt.show()

## Visualization Dimensions

In [45]:
# Exploring dimension columns by Category
@interact(Period=['2022-09-30'], Event=df1[eve_col[0]].unique(), Category=df1[cat_col[0]].unique(), Dimension=dim_col, Percentage=[True, False])
def explore_value_counts(Period, Event, Category, Dimension, Percentage):
  df = df1[(df1[date_col[0]]==Period)&(df1[eve_col[0]]==Event)&(df1[cat_col[0]]==Category)].copy().reset_index(drop=True)
  #return df
  return df.value_counts(subset=[Dimension], normalize=Percentage, dropna=False)

interactive(children=(Dropdown(description='Period', options=('2022-09-30',), value='2022-09-30'), Dropdown(de…