<a href="https://colab.research.google.com/github/gabrielborja/parc_de_montjuic/blob/main/customer_loyalty.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Customer loyalty

## Importing libraries

In [None]:
# Importing python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact

## Loading Data

In [None]:
# Loading data from local drive
from google.colab import files
uploaded1 = files.upload()

In [None]:
# Storing loaded data from excel to a pandas dataframe
#import io
#df1 = pd.read_csv(io.BytesIO(uploaded1['Script_202208170953.xlsx']))

In [None]:
# Storing loaded data from csv to a pandas dataframe
import io
df1 = pd.read_csv(io.BytesIO(uploaded1['Script_202208221640.csv']), sep='|', engine='python')

In [None]:
# Cleaning loyalty values
pass_dict = {'Passiv_A': 'Passiv', 'Passiv_B': 'Passiv'}
df1['LOYALTY'].replace(to_replace=pass_dict, inplace=True)

In [None]:
# Slicing to 3 dataframe to separate numeric, boolean and categorical values
df1_a = df1.iloc[:,[38,5,6,7,8,9,10,11,12,13,14,15,16,17]].copy()
df1_b = df1.iloc[:,[38,18,20,21,22,23,24,25,25]].copy()
df1_c = df1.iloc[:,[38,26,27,28,29,30,31,32,33,34,35,36,37]].copy()

In [None]:
# Checking the dataframe shape
df1.tail(1)

## Exploring Data

In [None]:
# Interacting with list of columns
@interact(Column_name = df1_a.columns, Category=df1_a['LOYALTY'].unique(), Percentage = [False, True])
def explore_columns(Column_name, Category, Percentage):
  return pd.DataFrame(df1_a[df1_a['LOYALTY']==Category][Column_name].value_counts(normalize=Percentage, dropna=False))

In [None]:
# Exploring subsets
df1.value_counts(subset='LOYALTY', normalize=True, dropna=False).reset_index(name='Percentage')

## Correlation Test

In [None]:
# Correlation matrix
fig, ax1 = plt.subplots(figsize=(15,15))
sns.heatmap(df1[df1['LOYALTY']=='Aktiv'].corr(), annot=True, vmin=-1.0, vmax=1.0, square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax1)
plt.show()

## Aggregating Data

In [None]:
# Creating list of aggregated fields
df1_list = [(pd.DataFrame(df1[i].value_counts(dropna=False))) for i in custom_list_a]

In [None]:
# Slicing for loyalty
df1_x = df1_a[df1_a['LOYALTY']=='Aktiv'].copy()

In [None]:
@interact(Category = custom_list_a)
def value_counts_pair(Category):
 return df1_x[['LOYALTY', Category]].value_counts(normalize=True, dropna=False).reset_index(name='Percentage')

In [None]:
# Creating list of aggregated fields by Loyalty
#df1_list = [(df1_x[['LOYALTY', i]].value_counts(normalize=True, dropna=False).reset_index(name='Percentage')) for i in custom_list_a]

In [None]:
#df1_x['VOICE_DURATION_AVG'].quantile(q=[0.25,0.5,0.75,1]).to_dict()

In [None]:
#pd.qcut(df1_x['VOICE_DURATION_AVG'], q=4, labels=['q1','q2','q3','q4'])

In [None]:
# Computing quantiles for different columns
@interact(Column=df1_a.columns[1:], Category=['Engasjert'])
def compute_quantiles(Column, Category):
  df = df1_a[df1_a['LOYALTY']==Category][['LOYALTY', Column]].copy()
  df = df.assign(rank = df[Column].rank(method='first'))
  df = df.assign(quantile = pd.qcut(df['rank'], q=4, labels=['q1','q2','q3','q4']))
  df_g = df.groupby(by=['LOYALTY','quantile']).agg(avg = (Column, 'mean')).reset_index()
  df_g.insert(1, 'Category', Column)
  return df_g
# Category: 'Aktiv', 'Engasjert', 'Passiv'

In [None]:
# Computing averages for numeric columns
@interact(Column_name=df1_a.columns[1:])
def compute_averages(Column_name):
  df = df1_a[['LOYALTY', Column_name]].copy()
  df = df.groupby(by=['LOYALTY']).agg(avg = (Column_name, 'mean')).reset_index()
  df = df.assign(avg = round(df['avg'],1))
  df.insert(1, 'Category', Column_name)
  return df
# Category: 'Aktiv', 'Engasjert', 'Passiv'

In [None]:
# Creating a list of averages for each column
df1_a_ls = [compute_averages(Column_name=i) for i in df1_a.columns[1:]]
df1_a_ap = pd.concat(df1_a_ls, ignore_index=True)

In [None]:
df1_a_ap

In [None]:
# Checking individual columns
df1_a_ls[3].iloc[:,1][0]

## Exporting results to local drive

In [None]:
# Exporting main excel file
with pd.ExcelWriter('Script_202208170859.xlsx', engine='openpyxl') as writer:
  df1_a.to_excel(writer, sheet_name='Bedriftsliste', index=False)
files.download('Script_202208221130.xlsx')

In [None]:
# Exporting list of excel sheets
with pd.ExcelWriter('Script_202208221130.xlsx', engine='openpyxl') as writer:
  for i in range(len(df1_a_ls)-1):
    df1_a_ls[i].to_excel(writer, sheet_name=f'{df1_a_ls[i].iloc[:,1][0]}', index=True)
files.download('Script_202208221130.xlsx')