<a href="https://colab.research.google.com/github/gabrielborja/parc_de_montjuic/blob/main/customer_loyalty.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Customer loyalty

## Importing libraries

In [None]:
# Updating libraries version
!pip install matplotlib --upgrade
!pip install plotly --upgrade

In [None]:
# Importing python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from ipywidgets import interact

## Loading Data

In [None]:
# Loading data from local drive
from google.colab import files
uploaded1 = files.upload()

In [None]:
# Storing loaded data from csv to a pandas dataframe
import io
df1 = pd.read_csv(io.BytesIO(uploaded1['Script_202208241720.csv']), sep='|', engine='python')

In [None]:
# Cleaning loyalty values
pass_dict = {'Passiv_A': 'Passiv', 'Passiv_B': 'Passiv'}
df1['LOYALTY'].replace(to_replace=pass_dict, inplace=True)

In [None]:
df1.info()

In [None]:
# Slicing 3 dataframes: numeric, boolean and categorical values
df1_a = df1.iloc[:,[30,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]].copy()
#df1_b = df1.iloc[:,[38,18,19,20,21,22,23,24,25]].copy()
df1_c = df1.iloc[:,[30,21,22,23,24,25,26,27,28,29]].copy()

In [None]:
# Checking the dataframe shape
df1_a.info()

## Exploring Data

In [None]:
# Exploring list of columns
@interact(Column_name = df1_a.columns, Category=df1_a['LOYALTY'].unique(), Percentage = [False, True])
def explore_columns(Column_name, Category, Percentage):
  return pd.DataFrame(df1_a[df1_a['LOYALTY']==Category][Column_name].value_counts(normalize=Percentage, dropna=False))

In [None]:
# Interacting with column bins
@interact(Column_name = df1_a.columns[1:], Category=df1_a['LOYALTY'].unique(), Percentage = [False, True])
def explore_bins(Column_name, Category, Percentage):
  df = df1_a[df1_a['LOYALTY']==Category].copy()
  return pd.cut(df[Column_name], bins=4).value_counts(normalize=True).reset_index()

In [None]:
# Exploring loyalty subcategories
df1.value_counts(subset='LOYALTY', dropna=False).reset_index(name='Percentage')

## Correlation Test

In [None]:
# Correlation matrix
@interact(Loyalty = sorted(df1_a['LOYALTY'].unique()))
def plot_box_plot_raw(Loyalty):
  fig_1, ax1 = plt.subplots(figsize=(15,15))
  sns.heatmap(df1_a[df1_a['LOYALTY']==Loyalty].corr(), annot=True, vmin=-1.0, vmax=1.0, square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax1),
  plt.show()

## Aggregating Data

In [None]:
## Computing averages for numeric columns
def append_averages():
  ''' Append averages for the following categories: "A", "E", "P" '''
  def compute_averages(Column_name):
    ''' Compute averages for the selected column '''
    df = df1_a[['LOYALTY', Column_name]].copy()
    df = df.groupby(by=['LOYALTY']).agg(AVG = (Column_name, 'mean')).reset_index()
    df = df.assign(AVG = round(df['AVG'],2))
    df.insert(1, 'CATEGORY', Column_name)
    return df
  df_ls = [compute_averages(Column_name=i) for i in df1_a.columns[1:]]
  df_ap = pd.concat(df_ls, ignore_index=True)
  return df_ap

df1_a_m = append_averages()

In [None]:
# Computing quantiles for numeric columns
def append_quantiles():
  ''' Append quantiles for the following categories: "A", "E", "P" '''
  def compute_quantiles(Column_name):
    ''' Compute quantiles for the selected column '''
    df = df1_a[['LOYALTY', Column_name]].copy()
    df = df.assign(rank = df[Column_name].rank(method='first'))
    df = df.assign(QUANTILE = pd.qcut(df['rank'], q=4, labels=['q1','q2','q3','q4']))
    df_g = df.groupby(by=['LOYALTY','QUANTILE']).agg(AVG = (Column_name, 'mean')).reset_index()
    df_g = df_g.assign(AVG = round(df_g['AVG'],2))
    df_g.insert(1, 'CATEGORY', Column_name)
    return df_g
  df_ls = [compute_quantiles(Column_name=i) for i in df1_a.columns[1:]]
  df_ap = pd.concat(df_ls, ignore_index=True)
  return df_ap

df1_a_q = append_quantiles()

In [None]:
# Computing bins for numeric columns
def append_bins():
  ''' Append bins for the following categories: "A", "E", "P" '''
  def compute_bins(Loyalty, Column_name):
    ''' Perform binning for the selected column '''
    df = df1_a[['LOYALTY', Column_name]].copy()
    df = df[df['LOYALTY']==Loyalty]
    Q1 = df[Column_name].quantile(0.25)
    Q3 = df[Column_name].quantile(0.75)
    IQR = Q3 - Q1
    df = df[(df[Column_name]>(Q1-1.5*IQR)) & (df[Column_name]<(Q3+1.5*IQR))].reset_index(drop=True)
    try:
      df_b = pd.cut(df[Column_name], bins=4).value_counts(normalize=True).reset_index()
      df_b.insert(0, 'LOYALTY', df['LOYALTY'].unique()[0])
      df_b.insert(1, 'CATEGORY', Column_name)
      df_b.rename(columns={Column_name:'PERCENTAGE', 'index': 'BINS'}, inplace=True)
      df_b = df_b.assign(PERCENTAGE = round(df_b['PERCENTAGE'], 2))
      return df_b
    except ValueError:
      return pd.DataFrame({'LOYALTY':[Loyalty],'CATEGORY':[Column_name], 'BINS':['(0,0]'], 'PERCENTAGE':[1]})
  df_ls = [compute_bins(Loyalty=j, Column_name=i) for i in df1_a.columns[1:] for j in df1_a['LOYALTY'].unique()]
  df_ap = pd.concat(df_ls, ignore_index=True)
  return df_ap

df1_a_i = append_bins()

In [None]:
# Computing percentages for categorical columns
def append_categorical():
  ''' Append categorical counts for the following categories: "A", "E", "P" '''
  def category_value_counts(Category, Column_name):
    ''' Category value counts for the selected column '''
    df = df1_c[df1_c['LOYALTY']==Category].value_counts(subset=['LOYALTY', Column_name], normalize=True, dropna=False).reset_index(name='PERCENTAGE')
    df.insert(1, 'CATEGORY', Column_name)
    df.rename(columns={Column_name:'TOP_3'}, inplace=True)
    df = df.assign(PERCENTAGE = round(df['PERCENTAGE'], 2))
    return df.head(3) #=> Top 3 values
  df_ls = [category_value_counts(Category=j, Column_name=i) for i in df1_c.columns[1:] for j in df1_c['LOYALTY'].unique()]
  df_ap = pd.concat(df_ls, ignore_index=True)
  return df_ap

df1_c_c = append_categorical()

### Computing Booleans (Optional)

In [None]:
# Computing percentages for boolean columns
#def append_booleans():
#  ''' Append boolean counts for the following categories: "A", "E", "P" '''
#  def boolean_value_counts(Category, Column_name):
#    ''' Boolean value counts for the selected column '''
#    df = df1_b[df1_b['LOYALTY']==Category].value_counts(subset=['LOYALTY', Column_name], normalize=True, dropna=False).reset_index(name='PERCENTAGE')
#    df.insert(1, 'CATEGORY', Column_name)
#    df.rename(columns={Column_name:'BOOLEAN'}, inplace=True)
#    df = df.assign(PERCENTAGE = round(df['PERCENTAGE'], 2))
#    return df
#  df_ls = [boolean_value_counts(Category=j, Column_name=i) for i in df1_b.columns[1:] for j in df1_b['LOYALTY'].unique()]
#  df_ap = pd.concat(df_ls, ignore_index=True)
#  return df_ap

#df1_b_b = append_booleans()

## Visualizing Data

In [None]:
# Function for plotting barplot next to boxenplot
def plot_paired_charts(df_bar, df_box, Column_name):
  ''' Function for plotting barplot next to boxenplot '''
  fig_x, axes = plt.subplots(1, 2, sharey=True, figsize=(9.5, 6.5))
  fig_x.suptitle(f'LOYALTY: {Column_name}')
  axes[0].set_title(f'Mean: {Column_name}')
  axes[1].set_title(f'Variability: {Column_name}')
  sns.barplot(x='LOYALTY', y=Column_name, data=df_bar, hue='LOYALTY', ci=False, ax=axes[0])
  sns.boxenplot(x='LOYALTY', y=Column_name, data=df_box, ax=axes[1])
  for container in axes[0].containers:
    axes[0].bar_label(container, fontsize=14)
  axes[0].legend_ = None
  axes[1].legend_ = None
  plt.show()

In [None]:
# Plotting the variability of each numeric column
@interact(Column_name = df1_a.columns[1:])
def plot_box_plot_with_outliers(Column_name):
  df_b = df1_a[['LOYALTY', Column_name]].copy()
  df_b = df_b.sort_values(by='LOYALTY')
  df_g = df_b.groupby(by='LOYALTY')[Column_name].mean().reset_index()
  df_g[Column_name] = round(df_g[Column_name], 3)
  plot_paired_charts(df_g, df_b, Column_name)

In [None]:
# Plotting the variability of each numeric column without outliers
@interact(Column_name = df1_a.columns[1:])
def plot_box_plot(Column_name):
  ''' Function for plotting boxplot without outliers'''
  ordered_cat = sorted(df1_a['LOYALTY'].unique())
  
  def trim_outliers(df, Column_name, Category):
    ''' Trim desired column out of outliers using IQR'''
    do = df[['LOYALTY', Column_name]].copy()
    do = do[do['LOYALTY']==Category]
    Q1 = do[Column_name].quantile(0.25)
    Q3 = do[Column_name].quantile(0.75)
    IQR = Q3 - Q1
    tt = do[(do[Column_name]>(Q1-1.5*IQR)) & (do[Column_name]<(Q3+1.5*IQR))].reset_index(drop=True)
    return tt
  df_l = [trim_outliers(df1_a, Column_name, i) for i in ordered_cat]
  df_m = pd.concat(df_l, ignore_index=True)
  df_g = df_m.groupby(by='LOYALTY')[Column_name].mean().reset_index()
  df_g[Column_name] = round(df_g[Column_name], 1)

  plot_paired_charts(df_g, df_m, Column_name)

In [None]:
# Plotting categorical values
@interact(Column_name = df1_c_c['CATEGORY'].unique())
def plot_plot_categories(Column_name):
  ''' Function for plotting barplot of categorical data '''
  df = df1_c_c[df1_c_c['CATEGORY']==Column_name].copy()
  fig_x, axes = plt.subplots(1, 3, sharey=True, figsize=(10.5, 6.5))
  fig_x.suptitle(f'LOYALTY: {Column_name}')
  axes[0].set_title(f'Aktiv: Top 3')
  axes[1].set_title(f'Engasjert: Top 3')
  axes[2].set_title(f'Passiv: Top 3')
  df[df['LOYALTY']=='Aktiv'].pivot(columns='TOP_3', index='LOYALTY', values='PERCENTAGE').plot(kind='bar', stacked=True, ax=axes[0])
  df[df['LOYALTY']=='Engasjert'].pivot(columns='TOP_3', index='LOYALTY', values='PERCENTAGE').plot(kind='bar', stacked=True, ax=axes[1])
  df[df['LOYALTY']=='Passiv'].pivot(columns='TOP_3', index='LOYALTY', values='PERCENTAGE').plot(kind='bar', stacked=True, ax=axes[2])
  for i in range(3):
    for container in axes[i].containers:
      axes[i].bar_label(container, fontsize=14)
  for i in range(3):
    axes[i].legend(bbox_to_anchor=(1.0, 0.5))
  fig_x.subplots_adjust(wspace=1)
  fig_x.show()

In [None]:
!pip list -v

In [None]:
@interact(Column_name = df1_a.columns[1:])
def calculate_quantiles(Column_name): 
  ts = df1_a[['LOYALTY', Column_name]].copy()
  ts = ts[ts['LOYALTY']=='Passiv']
  Q1 = ts[Column_name].quantile(0.25)
  Q3 = ts[Column_name].quantile(0.75)
  IQR = Q3 - Q1
  ts = ts[(ts[Column_name]>(Q1-1.5*IQR)) & (ts[Column_name]<(Q3+1.5*IQR))].reset_index(drop=True)
  df_b = pd.cut(ts[Column_name], bins=4).value_counts(normalize=True).reset_index()
  return df_b
  #return print(Q1, Q3, IQR)
  #return ts
  # [ts[Column_name].quantile(i) for i in np.linspace(0,1,4)]

In [None]:
df1_a[(df1_a['LOYALTY']=='Aktiv') & (df1_a['COUNTCLICK']>=0.0)].reset_index().value_counts(subset='COUNTCLICK', normalize=True)
#.groupby(by='COUNTCLICK').agg(CLICKS = ('LOYALTY', 'count')).reset_index()

## Exporting results to local drive

In [None]:
# Exporting main excel file
with pd.ExcelWriter('Script_20220823_kjro.xlsx', engine='openpyxl') as writer:
  df1_a_m.to_excel(writer, sheet_name=f'{df1_a_m.columns[2]}', index=False)
  df1_a_q.to_excel(writer, sheet_name=f'{df1_a_q.columns[2]}', index=False)
  df1_a_i.to_excel(writer, sheet_name=f'{df1_a_i.columns[2]}', index=False)
  #df1_b_b.to_excel(writer, sheet_name=f'{df1_b_b.columns[2]}', index=False)
  df1_c_c.to_excel(writer, sheet_name=f'{df1_c_c.columns[2]}', index=False)
files.download('Script_20220823_kjro.xlsx')

In [None]:
# Exporting list of excel sheets
#with pd.ExcelWriter('Script_202208221130.xlsx', engine='openpyxl') as writer:
#  for i in range(len(df1_a_ls)-1):
#    df1_a_ls[i].to_excel(writer, sheet_name=f'{df1_a_ls[i].iloc[:,1][0]}', index=True)
#files.download('Script_202208221130.xlsx')