<a href="https://colab.research.google.com/github/gabrielborja/parc_de_montjuic/blob/main/customer_loyalty.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Customer loyalty

## Importing libraries

In [None]:
# Importing python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact

## Loading Data

In [None]:
# Loading data from local drive
from google.colab import files
uploaded1 = files.upload()

In [None]:
# Storing loaded data from csv to a pandas dataframe
import io
df1 = pd.read_csv(io.BytesIO(uploaded1['Script_202208231129.csv']), sep='|', engine='python')

In [None]:
# Cleaning loyalty values
pass_dict = {'Passiv_A': 'Passiv', 'Passiv_B': 'Passiv'}
df1['LOYALTY'].replace(to_replace=pass_dict, inplace=True)

In [None]:
# Slicing 3 dataframes: numeric, boolean and categorical values
df1_a = df1.iloc[:,[38,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]].copy()
df1_b = df1.iloc[:,[38,18,19,20,21,22,23,24,25]].copy()
df1_c = df1.iloc[:,[38,26,27,28,29,30,31,32,33,34,35,36,37]].copy()

In [None]:
# Checking the dataframe shape
df1_a.info()

## Exploring Data

In [None]:
# Exploring list of columns
@interact(Column_name = df1_a.columns, Category=df1_a['LOYALTY'].unique(), Percentage = [False, True])
def explore_columns(Column_name, Category, Percentage):
  return pd.DataFrame(df1_a[df1_a['LOYALTY']==Category][Column_name].value_counts(normalize=Percentage, dropna=False))

In [None]:
# Interacting with column bins
@interact(Column_name = df1_a.columns[1:], Category=df1_a['LOYALTY'].unique(), Percentage = [False, True])
def explore_bins(Column_name, Category, Percentage):
  df = df1_a[df1_a['LOYALTY']==Category].copy()
  return pd.cut(df[Column_name], bins=4).value_counts(normalize=True).reset_index()

In [None]:
# Ploting disproportionate bins
def plot_hist(df):
  var = 'MARKET_EMPLOYEE'
  df_h = df[df[var] < 280]
  return sns.histplot(x=var, data=df_h)
plot_hist(df1_a)
plt.show()

In [None]:
# Customizing disproportionate bins
def explore_bins(Column_name, Category, Percentage, Cut):
  df = df1_a[df1_a['LOYALTY']==Category].copy()
  df = df[df[Column_name] < Cut]
  return pd.cut(df[Column_name], bins=8).value_counts(normalize=True).reset_index()

explore_bins('SUBSCRIBER_AGE', 'Engasjert', True, 91)

In [None]:
# Exploring subsets
df1.value_counts(subset='LOYALTY', normalize=True, dropna=False).reset_index(name='Percentage')

## Correlation Test

In [None]:
# Correlation matrix
fig, ax1 = plt.subplots(figsize=(15,15))
sns.heatmap(df1[df1['LOYALTY']=='Aktiv'].corr(), annot=True, vmin=-1.0, vmax=1.0, square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax1)
plt.show()

## Aggregating Data

In [None]:
# Computing averages for numeric columns
def append_averages():
  ''' Append averages for the following categories: "A", "E", "P" '''
  def compute_averages(Column_name):
    ''' Compute averages for the selected column '''
    df = df1_a[['LOYALTY', Column_name]].copy()
    df = df.groupby(by=['LOYALTY']).agg(AVG = (Column_name, 'mean')).reset_index()
    df = df.assign(AVG = round(df['AVG'],2))
    df.insert(1, 'CATEGORY', Column_name)
    return df
  df_ls = [compute_averages(Column_name=i) for i in df1_a.columns[1:]]
  df_ap = pd.concat(df_ls, ignore_index=True)
  return df_ap

df1_a_m = append_averages()

In [None]:
# Computing quantiles for numeric columns
def append_quantiles():
  ''' Append quantiles for the following categories: "A", "E", "P" '''
  def compute_quantiles(Column_name):
    ''' Compute quantiles for the selected column '''
    df = df1_a[['LOYALTY', Column_name]].copy()
    df = df.assign(rank = df[Column_name].rank(method='first'))
    df = df.assign(QUANTILE = pd.qcut(df['rank'], q=4, labels=['q1','q2','q3','q4']))
    df_g = df.groupby(by=['LOYALTY','QUANTILE']).agg(AVG = (Column_name, 'mean')).reset_index()
    df_g = df_g.assign(AVG = round(df_g['AVG'],2))
    df_g.insert(1, 'CATEGORY', Column_name)
    return df_g
  df_ls = [compute_quantiles(Column_name=i) for i in df1_a.columns[1:]]
  df_ap = pd.concat(df_ls, ignore_index=True)
  return df_ap

df1_a_q = append_quantiles()

In [None]:
# Computing bins for numeric columns
def append_bins():
  ''' Append bins for the following categories: "A", "E", "P" '''
  def compute_bins(Loyalty, Column_name):
    ''' Perform binning for the selected column '''
    df = df1_a[df1_a['LOYALTY']==Loyalty].copy()
    df_b = pd.cut(df[Column_name], bins=4).value_counts(normalize=True).reset_index()
    df_b.insert(0, 'LOYALTY', df['LOYALTY'].unique()[0])
    df_b.insert(1, 'CATEGORY', Column_name)
    df_b.rename(columns={Column_name:'PERCENTAGE', 'index': 'BINS'}, inplace=True)
    df_b = df_b.assign(PERCENTAGE = round(df_b['PERCENTAGE'], 2))
    return df_b
  df_ls = [compute_bins(Loyalty=j, Column_name=i) for i in df1_a.columns[1:] for j in df1_a['LOYALTY'].unique()]
  df_ap = pd.concat(df_ls, ignore_index=True)
  return df_ap

df1_a_i = append_bins()

In [None]:
# Computing percentages for boolean columns
def append_booleans():
  ''' Append boolean counts for the following categories: "A", "E", "P" '''
  def boolean_value_counts(Category, Column_name):
    ''' Boolean value counts for the selected column '''
    df = df1_b[df1_b['LOYALTY']==Category].value_counts(subset=['LOYALTY', Column_name], normalize=True, dropna=False).reset_index(name='PERCENTAGE')
    df.insert(1, 'CATEGORY', Column_name)
    df.rename(columns={Column_name:'BOOLEAN'}, inplace=True)
    df = df.assign(PERCENTAGE = round(df['PERCENTAGE'], 2))
    return df
  df_ls = [boolean_value_counts(Category=j, Column_name=i) for i in df1_b.columns[1:] for j in df1_b['LOYALTY'].unique()]
  df_ap = pd.concat(df_ls, ignore_index=True)
  return df_ap

df1_b_b = append_booleans()

In [None]:
# Computing percentages for categorical columns
def append_categorical():
  ''' Append categorical counts for the following categories: "A", "E", "P" '''
  def category_value_counts(Category, Column_name):
    ''' Category value counts for the selected column '''
    df = df1_c[df1_c['LOYALTY']==Category].value_counts(subset=['LOYALTY', Column_name], normalize=True, dropna=False).reset_index(name='PERCENTAGE')
    df.insert(1, 'CATEGORY', Column_name)
    df.rename(columns={Column_name:'TOP_3'}, inplace=True)
    df = df.assign(PERCENTAGE = round(df['PERCENTAGE'], 2))
    return df.head(3) #=> Top 3 values
  df_ls = [category_value_counts(Category=j, Column_name=i) for i in df1_c.columns[1:] for j in df1_c['LOYALTY'].unique()]
  df_ap = pd.concat(df_ls, ignore_index=True)
  return df_ap

df1_c_c = append_categorical()

## Exporting results to local drive

In [None]:
# Exporting main excel file
with pd.ExcelWriter('Script_20220823_kjro.xlsx', engine='openpyxl') as writer:
  df1_a_m.to_excel(writer, sheet_name=f'{df1_a_m.columns[2]}', index=False)
  df1_a_q.to_excel(writer, sheet_name=f'{df1_a_q.columns[2]}', index=False)
  df1_a_i.to_excel(writer, sheet_name=f'{df1_a_i.columns[2]}', index=False)
  df1_b_b.to_excel(writer, sheet_name=f'{df1_b_b.columns[2]}', index=False)
  df1_c_c.to_excel(writer, sheet_name=f'{df1_c_c.columns[2]}', index=False)
files.download('Script_20220823_kjro.xlsx')

In [None]:
# Exporting list of excel sheets
#with pd.ExcelWriter('Script_202208221130.xlsx', engine='openpyxl') as writer:
#  for i in range(len(df1_a_ls)-1):
#    df1_a_ls[i].to_excel(writer, sheet_name=f'{df1_a_ls[i].iloc[:,1][0]}', index=True)
#files.download('Script_202208221130.xlsx')