# Preppin' Data 2023 Week 6

#### Load data

In [106]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('DSB Customer Survey.csv')

In [3]:
df.head()

Unnamed: 0,Customer ID,Mobile App - Ease of Use,Mobile App - Ease of Access,Mobile App - Navigation,Mobile App - Likelihood to Recommend,Mobile App - Overall Rating,Online Interface - Ease of Use,Online Interface - Ease of Access,Online Interface - Navigation,Online Interface - Likelihood to Recommend,Online Interface - Overall Rating
0,535084,2,1,5,4,1,4,4,5,2,3
1,250892,3,5,4,4,2,5,5,2,4,3
2,544191,5,3,4,4,1,3,3,2,3,1
3,949343,2,5,4,3,1,1,4,3,5,1
4,915305,3,1,2,1,1,4,2,4,3,2


#### Reshape to have Mobile and Online Interface in one column

In [31]:
df_m = df[['Customer ID','Mobile App - Ease of Use','Mobile App - Ease of Access',\
           'Mobile App - Navigation','Mobile App - Likelihood to Recommend',\
           'Mobile App - Overall Rating']]

In [32]:
df_m = df_m.melt(id_vars=['Customer ID'], var_name='Mobile App', value_name='Mobile App Value')

In [33]:
df_m['Mobile App'] = df_m['Mobile App'].str.replace('Mobile App - ', '')

In [34]:
df_m

Unnamed: 0,Customer ID,Mobile App,Mobile App Value
0,535084,Ease of Use,2
1,250892,Ease of Use,3
2,544191,Ease of Use,5
3,949343,Ease of Use,2
4,915305,Ease of Use,3
...,...,...,...
3835,374015,Overall Rating,2
3836,144922,Overall Rating,4
3837,421323,Overall Rating,3
3838,707580,Overall Rating,5


#### Online Interface

In [36]:
df_o = df[['Customer ID','Online Interface - Ease of Use','Online Interface - Ease of Access',\
           'Online Interface - Navigation','Online Interface - Likelihood to Recommend',\
           'Online Interface - Overall Rating']]

In [37]:
df_o = df_o.melt(id_vars=['Customer ID'], var_name='Online Interface', value_name='Online Interface Value')

In [38]:
df_o['Online Interface'] = df_o['Online Interface'].str.replace('Online Interface - ', '')

In [39]:
df_o

Unnamed: 0,Customer ID,Online Interface,Online Interface Value
0,535084,Ease of Use,4
1,250892,Ease of Use,5
2,544191,Ease of Use,3
3,949343,Ease of Use,1
4,915305,Ease of Use,4
...,...,...,...
3835,374015,Overall Rating,1
3836,144922,Overall Rating,3
3837,421323,Overall Rating,2
3838,707580,Overall Rating,1


#### Merge back together

In [52]:
df1 = pd.merge(df_m, df_o, how='left', left_on=['Customer ID', 'Mobile App'],\
               right_on=['Customer ID','Online Interface'])

In [53]:
df1

Unnamed: 0,Customer ID,Mobile App,Mobile App Value,Online Interface,Online Interface Value
0,535084,Ease of Use,2,Ease of Use,4
1,250892,Ease of Use,3,Ease of Use,5
2,544191,Ease of Use,5,Ease of Use,3
3,949343,Ease of Use,2,Ease of Use,1
4,915305,Ease of Use,3,Ease of Use,4
...,...,...,...,...,...
3835,374015,Overall Rating,2,Overall Rating,1
3836,144922,Overall Rating,4,Overall Rating,3
3837,421323,Overall Rating,3,Overall Rating,2
3838,707580,Overall Rating,5,Overall Rating,1


In [55]:
# test one value
df1.loc[df1['Customer ID'] == 535084]

Unnamed: 0,Customer ID,Mobile App,Mobile App Value,Online Interface,Online Interface Value
0,535084,Ease of Use,2,Ease of Use,4
768,535084,Ease of Access,1,Ease of Access,4
1536,535084,Navigation,5,Navigation,5
2304,535084,Likelihood to Recommend,4,Likelihood to Recommend,2
3072,535084,Overall Rating,1,Overall Rating,3


#### Drop Overall Rating rows

In [63]:
#find index position
or_names = df1[df1['Mobile App'] == 'Overall Rating'].index

In [64]:
# drop based on index position
df1.drop(or_names, inplace = True)

In [65]:
df1

Unnamed: 0,Customer ID,Mobile App,Mobile App Value,Online Interface,Online Interface Value
0,535084,Ease of Use,2,Ease of Use,4
1,250892,Ease of Use,3,Ease of Use,5
2,544191,Ease of Use,5,Ease of Use,3
3,949343,Ease of Use,2,Ease of Use,1
4,915305,Ease of Use,3,Ease of Use,4
...,...,...,...,...,...
3067,374015,Likelihood to Recommend,5,Likelihood to Recommend,5
3068,144922,Likelihood to Recommend,2,Likelihood to Recommend,4
3069,421323,Likelihood to Recommend,1,Likelihood to Recommend,1
3070,707580,Likelihood to Recommend,4,Likelihood to Recommend,4


#### Calculate the Average Ratings for each platform for each customer

In [95]:
df1['Mobile App Average'] = df1.groupby('Customer ID')['Mobile App Value'].transform('mean')

In [98]:
df1['Online Interface Average'] = df1.groupby('Customer ID')['Online Interface Value'].transform('mean')

In [99]:
# test one value
df1.loc[df1['Customer ID'] == 535084]

Unnamed: 0,Customer ID,Mobile App,Mobile App Value,Online Interface,Online Interface Value,Mobile App Average,Online Interface Average
0,535084,Ease of Use,2,Ease of Use,4,3.0,3.75
768,535084,Ease of Access,1,Ease of Access,4,3.0,3.75
1536,535084,Navigation,5,Navigation,5,3.0,3.75
2304,535084,Likelihood to Recommend,4,Likelihood to Recommend,2,3.0,3.75


#### Calculate the difference in Average Rating between Mobile App and Online Interface for each customer

In [100]:
df1['Difference'] = df1['Mobile App Average'] - df1['Online Interface Average']

In [101]:
df1.head()

Unnamed: 0,Customer ID,Mobile App,Mobile App Value,Online Interface,Online Interface Value,Mobile App Average,Online Interface Average,Difference
0,535084,Ease of Use,2,Ease of Use,4,3.0,3.75,-0.75
1,250892,Ease of Use,3,Ease of Use,5,4.0,4.0,0.0
2,544191,Ease of Use,5,Ease of Use,3,4.0,2.75,1.25
3,949343,Ease of Use,2,Ease of Use,1,3.5,3.25,0.25
4,915305,Ease of Use,3,Ease of Use,4,1.75,3.25,-1.5


#### Categorise customers as being:
Mobile App Superfans if the difference is greater than or equal to 2 in the Mobile App's favour
Mobile App Fans if difference >= 1
Online Interface Fan
Online Interface Superfan
Neutral if difference is between 0 and 1

In [115]:
# set the conditions
conditions = [
    (df1['Difference'] >= 2),
    (df1['Difference'] >= 1),
    (df1['Difference'] <= -2),
    (df1['Difference'] <= -1),
    (df1['Difference'] > -1) & (df1['Difference'] < 1)
]

#specify what values to return
values = ['Mobile App Superfan','Mobile App Fan','Online Interface Superfan','Online Interface Fan','Neutral']

# apply to column
df1['Preference'] = np.select(conditions, values)

In [116]:
df1.head()

Unnamed: 0,Customer ID,Mobile App,Mobile App Value,Online Interface,Online Interface Value,Mobile App Average,Online Interface Average,Difference,Preference
0,535084,Ease of Use,2,Ease of Use,4,3.0,3.75,-0.75,Neutral
1,250892,Ease of Use,3,Ease of Use,5,4.0,4.0,0.0,Neutral
2,544191,Ease of Use,5,Ease of Use,3,4.0,2.75,1.25,Mobile App Fan
3,949343,Ease of Use,2,Ease of Use,1,3.5,3.25,0.25,Neutral
4,915305,Ease of Use,3,Ease of Use,4,1.75,3.25,-1.5,Online Interface Fan


#### Calculate the Percent of Total customers in each category, rounded to 1 decimal place

In [168]:
df2 = df1[['Customer ID','Preference']]

In [162]:
df2.head()

Unnamed: 0,Customer ID,Preference
0,535084,Neutral
1,250892,Neutral
2,544191,Mobile App Fan
3,949343,Neutral
4,915305,Online Interface Fan


In [169]:
df2['Total'] = df2.groupby('Preference')['Customer ID'].transform('count')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Total'] = df2.groupby('Preference')['Customer ID'].transform('count')


In [170]:
df2 = df2.drop('Customer ID', axis=1)

In [171]:
df2 = df2.drop_duplicates()

In [172]:
df2

Unnamed: 0,Preference,Total
0,Neutral,1956
2,Mobile App Fan,504
4,Online Interface Fan,452
21,Online Interface Superfan,80
73,Mobile App Superfan,80


In [194]:
df2['% of Total'] = round(df2['Total']/df2['Total'].sum()*100, 1)

In [195]:
df2

Unnamed: 0,Preference,Total,Table Total,% of Total
0,Neutral,1956,1956,63.7
2,Mobile App Fan,504,504,16.4
4,Online Interface Fan,452,452,14.7
21,Online Interface Superfan,80,80,2.6
73,Mobile App Superfan,80,80,2.6


#### Output

In [200]:
df2[['Preference','% of Total']].to_csv('2023W06_output.csv', index=False)