This notebook will take the results of the best model and the results of the popular recommendation engine and prepare/format them so that they are ready for dashboard creation.

In [1]:
# Loading basic needed libraries
import pandas as pd
import numpy as np
import gc

# Loading libraries for S3 bucket connection
import boto3
import io
from io import StringIO,BytesIO, TextIOWrapper
import gzip

client = boto3.client('s3') 
resource = boto3.resource('s3') 

In [2]:
# Reading CNN best model
CNN_recs = pd.read_csv('s3://myaws-capstone-bucket/data/modeling/output/CNN_param1.csv')
CNN_recs.head()

Unnamed: 0,userID,Category_Rank,catID,category,category_id,user_id
0,1,0,789,2232732108613223108_sport.trainer,2232732108613223108,128968633
1,1,1,669,2232732093077520756_construction.tools.light,2232732093077520756,128968633
2,1,2,726,2232732101063475749_appliances.environment.vacuum,2232732101063475749,128968633
3,1,5,715,2232732099754852875_appliances.personal.massager,2232732099754852875,128968633
4,1,3,606,2232732079706079299_sport.bicycle,2232732079706079299,128968633


In [14]:
# Pivoting results so that they are on wanted format
pivot_cNN = (CNN_recs.set_index(['user_id', 'Category_Rank'])['category'].unstack().reset_index().rename_axis(None, axis=1))
pivot_cNN.columns = ['user_id', 'recommendation#1', 'recommendation#2', 'recommendation#3', 'recommendation#4', 'recommendation#5', 'recommendation#6', 'recommendation#7', 'recommendation#8', 'recommendation#9', 'recommendation#10']
pivot_cNN.head()

Unnamed: 0,user_id,recommendation#1,recommendation#2,recommendation#3,recommendation#4,recommendation#5,recommendation#6,recommendation#7,recommendation#8,recommendation#9,recommendation#10
0,128968633,2232732108613223108_sport.trainer,2232732093077520756_construction.tools.light,2232732101063475749_appliances.environment.vacuum,2232732079706079299_sport.bicycle,2053013557452210699_electronics.clocks,2232732099754852875_appliances.personal.massager,2232732082063278200_electronics.clocks,2053013554658804075_electronics.audio.headphone,2232732082390433922_electronics.audio.subwoofer,2232732093345956218_construction.tools.light
1,145611266,2053013552259662037_computers.components.power...,2232732093077520756_construction.tools.light,2232732079706079299_sport.bicycle,2053013557477376525_furniture.bathroom.bath,2053013566142809077_construction.tools.generator,2232732091391410500_appliances.kitchen.blender,2232732101063475749_appliances.environment.vacuum,2232732105635267203_kids.swing,2053013554658804075_electronics.audio.headphone,2232732105912091273_appliances.kitchen.mixer
2,221480173,2232732101407408685_apparel.shoes.slipons,2232732093077520756_construction.tools.light,2232732099754852875_appliances.personal.massager,2053013554658804075_electronics.audio.headphone,2232732079706079299_sport.bicycle,2232732103101907535_electronics.clocks,2232732101063475749_appliances.environment.vacuum,2232732092297380188_appliances.kitchen.washer,2232732089587859740_appliances.personal.hair_c...,2053013551882174655_construction.tools.welding
3,237973968,2232732091307524418_appliances.kitchen.refrige...,2232732093077520756_construction.tools.light,2232732101063475749_appliances.environment.vacuum,2232732091718566220_appliances.kitchen.refrige...,2232732092297380188_appliances.kitchen.washer,2232732079706079299_sport.bicycle,2232732099754852875_appliances.personal.massager,2232732091391410500_appliances.kitchen.blender,2053013557452210699_electronics.clocks,2053013554658804075_electronics.audio.headphone
4,259560538,2053013556856619499_accessories.bag,2053013566142809077_construction.tools.generator,2232732093077520756_construction.tools.light,2232732091391410500_appliances.kitchen.blender,2232732092297380188_appliances.kitchen.washer,2232732115617710964_apparel.shoes.keds,2232732093857661318_furniture.bedroom.blanket,2232732105912091273_appliances.kitchen.mixer,2232732102414041665_computers.components.cooler,2053013557477376525_furniture.bathroom.bath


In [15]:
pivot_cNN.nunique()

user_id              535748
recommendation#1        809
recommendation#2        616
recommendation#3        611
recommendation#4        606
recommendation#5        617
recommendation#6        631
recommendation#7        643
recommendation#8        643
recommendation#9        645
recommendation#10       641
dtype: int64

In [16]:
# Saving results of recommendations in S3
pivot_cNN.to_csv('s3://myaws-capstone-bucket/data/cnn_recs_final.csv',index=False)

In [19]:
# Reading popularity recs model
pop_recs = pd.read_csv('s3://myaws-capstone-bucket/data/modeling/output/popular_recs.csv')
# Dropping duplicates to only keep the top popular recs for each cluster
pop_recs = pop_recs.drop_duplicates(subset=['cluster', 'category', 'purchase_rank'])
pop_recs.head()

Unnamed: 0,user_id,cluster,category,purchase_rank
0,512823699,2,2053013555631882655_electronics.smartphone,1
1,512823699,2,2232732093077520756_construction.tools.light,2
2,512823699,2,2232732079706079299_sport.bicycle,3
3,512823699,2,2053013554658804075_electronics.audio.headphone,4
4,512823699,2,2232732103831716449_apparel.shoes,5


In [22]:
# Pivoting popular recs so that they are on the desired format for dashboard
pivot_pop_recs = (pop_recs.set_index(['cluster', 'purchase_rank'])['category'].unstack().reset_index().rename_axis(None, axis=1))
pivot_pop_recs.columns = ['cluster', 'pop_rec#1', 'pop_rec#2', 'pop_rec#3', 'pop_rec#4', 'pop_rec#5', 'pop_rec#6', 'pop_rec#7', 'pop_rec#8', 'pop_rec#9', 'pop_rec#10']
pivot_pop_recs.head(10)

Unnamed: 0,cluster,pop_rec#1,pop_rec#2,pop_rec#3,pop_rec#4,pop_rec#5,pop_rec#6,pop_rec#7,pop_rec#8,pop_rec#9,pop_rec#10
0,0,2232732093077520756_construction.tools.light,2053013555631882655_electronics.smartphone,2053013554658804075_electronics.audio.headphone,2232732099754852875_appliances.personal.massager,2232732079706079299_sport.bicycle,2232732092297380188_appliances.kitchen.washer,2232732103101907535_electronics.clocks,2232732101063475749_appliances.environment.vacuum,2232732091718566220_appliances.kitchen.refrige...,2053013563835941749_appliances.kitchen.refrige...
1,1,2053013555631882655_electronics.smartphone,2232732093077520756_construction.tools.light,2053013563810775923_appliances.kitchen.washer,2053013558920217191_computers.notebook,2053013554658804075_electronics.audio.headphone,2053013554415534427_electronics.video.tv,2053013565983425517_appliances.environment.vacuum,2053013563911439225_appliances.kitchen.refrige...,2053013561579406073_electronics.clocks,2053013553341792533_electronics.clocks
2,2,2053013555631882655_electronics.smartphone,2232732093077520756_construction.tools.light,2232732079706079299_sport.bicycle,2053013554658804075_electronics.audio.headphone,2232732103831716449_apparel.shoes,2232732101063475749_appliances.environment.vacuum,2053013565983425517_appliances.environment.vacuum,2232732091391410500_appliances.kitchen.blender,2232732098446229999_apparel.shoes.sandals,2232732102103663163_furniture.bedroom.blanket
3,3,2232732093077520756_construction.tools.light,2053013555631882655_electronics.smartphone,2232732079706079299_sport.bicycle,2053013554658804075_electronics.audio.headphone,2232732099754852875_appliances.personal.massager,2232732103101907535_electronics.clocks,2232732092297380188_appliances.kitchen.washer,2053013554415534427_electronics.video.tv,2232732101063475749_appliances.environment.vacuum,2053013554725912943_appliances.kitchen.coffee_...
4,4,2232732093077520756_construction.tools.light,2053013555631882655_electronics.smartphone,2232732079706079299_sport.bicycle,2053013554658804075_electronics.audio.headphone,2232732101063475749_appliances.environment.vacuum,2232732099754852875_appliances.personal.massager,2053013554415534427_electronics.video.tv,2232732092297380188_appliances.kitchen.washer,2053013563810775923_appliances.kitchen.washer,2232732103101907535_electronics.clocks
5,5,2232732093077520756_construction.tools.light,2232732079706079299_sport.bicycle,2232732101063475749_appliances.environment.vacuum,2053013555631882655_electronics.smartphone,2232732103831716449_apparel.shoes,2232732089587859740_appliances.personal.hair_c...,2232732091391410500_appliances.kitchen.blender,2232732086928670945_electronics.camera.photo,2053013554658804075_electronics.audio.headphone,2232732098446229999_apparel.shoes.sandals


In [23]:
# Saving results of popular recommendations in S3
pivot_pop_recs.to_csv('s3://myaws-capstone-bucket/data/pop_recs_final.csv',index=False)