# Recommender System:

- The last thing to do is to use our saved models to recommend items to users:



### For the requested user:
- Calculate the score for every item.
- Sort the items based on the score and output the top results.


### Check which users exist on the test set

In [1]:
!pip install ipython-autotime



In [2]:
#### To measure all running time
# https://github.com/cpcloud/ipython-autotime

%load_ext autotime

In [3]:
import pandas as pd

import pickle
import pandas as pd
import numpy as np

import os

#Keras
from keras.models import load_model
from keras import backend as K

# Tensorflow
import tensorflow as tf

from sklearn.metrics import mean_squared_error

Using TensorFlow backend.



time: 2.46 s


### Set and Check GPUs

In [4]:
def set_check_gpu():
    cfg = K.tf.ConfigProto()
    cfg.gpu_options.per_process_gpu_memory_fraction =1 # allow all of the GPU memory to be allocated
    # for 8 GPUs
    cfg.gpu_options.visible_device_list = "0,1,2,3,4,5,6,7" # "0,1"
    # for 1 GPU
#     cfg.gpu_options.visible_device_list = "0"
    #cfg.gpu_options.allow_growth = True  # # Don't pre-allocate memory; dynamically allocate the memory used on the GPU as-needed
    #cfg.log_device_placement = True  # to log device placement (on which device the operation ran)
    sess = K.tf.Session(config=cfg)
    K.set_session(sess)  # set this TensorFlow session as the default session for Keras

    print("* TF version: ", [tf.__version__, tf.test.is_gpu_available()])
    print("* List of GPU(s): ", tf.config.experimental.list_physical_devices() )
    print("* Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) 
  
    
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
    # set for 8 GPUs
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7";
    # set for 1 GPU
#     os.environ["CUDA_VISIBLE_DEVICES"] = "0";

    # Tf debugging option
    tf.debugging.set_log_device_placement(True)

    gpus = tf.config.experimental.list_physical_devices('GPU')

    if gpus:
        try:
            # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)

#     print(tf.config.list_logical_devices('GPU'))
    print(tf.config.experimental.list_physical_devices('GPU'))
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

time: 8.57 ms


In [5]:
# set_check_gpu()

time: 3.52 ms


In [6]:
from sklearn.model_selection import train_test_split

review_data = pd.read_csv('../data/amazon_reviews_us_Shoes_v1_00_help_voted_And_cut_lognTail.csv')
review_data.rename(columns={ 'star_rating': 'score','customer_id': 'user_id', 'user': 'user_name'}, inplace=True)

items = review_data.product_id.unique()
item_map = {i:val for i,val in enumerate(items)}
inverse_item_map = {val:i for i,val in enumerate(items)}
review_data["old_item_id"] = review_data["product_id"] # copying for join with metadata
review_data["item_id"] = review_data["product_id"].map(inverse_item_map)
items = review_data.item_id.unique()
print ("We have %d unique items in metadata "%items.shape[0])

users = review_data.user_id.unique()
user_map = {i:val for i,val in enumerate(users)}
inverse_user_map = {val:i for i,val in enumerate(users)}
review_data["old_user_id"] = review_data["user_id"] 
review_data["user_id"] = review_data["user_id"].map(inverse_user_map)

items_reviewed = review_data.product_id.unique()
review_data["old_item_id"] = review_data["product_id"] # copying for join with metadata
review_data["item_id"] = review_data["product_id"].map(inverse_item_map)

items_reviewed = review_data.item_id.unique()
users = review_data.user_id.unique()
helpful_votes = review_data.helpful_votes.unique()


ratings_train, ratings_test = train_test_split( review_data, test_size=0.1, random_state=0)

We have 97758 unique items in metadata 
time: 1.22 s


In [7]:
ratings_test.user_id.value_counts().sort_values(ascending=False).head(10)

15136    5
15745    4
2378     4
3374     4
47739    4
5981     4
2943     4
36       4
29962    4
1114     4
Name: user_id, dtype: int64

time: 22.9 ms


### ** Create a  recommendation example dataset of 100 users from the test set and all items for each and predict recommendations for them

In [8]:
items = review_data.product_id.unique()
item_map = {i:val for i,val in enumerate(items)}
inverse_item_map = {val:i for i,val in enumerate(items)}
review_data["old_item_id"] = review_data["product_id"] # copying for join with metadata
review_data["item_id"] = review_data["product_id"].map(inverse_item_map)
items = review_data.item_id.unique()
print ("We have %d unique items in metadata "%items.shape[0])

We have 97758 unique items in metadata 
time: 272 ms


In [9]:
# all_info['description'] = all_info['description'].fillna(all_info['title'].fillna('no_data'))
# all_info['title'] = all_info['title'].fillna(all_info['description'].fillna('no_data').apply(str).str[:20])
# all_info['image'] = all_info['image'].fillna('no_data')
# all_info['price'] = pd.to_numeric(all_info['price'],errors="coerce")
# all_info['price'] = all_info['price'].fillna(all_info['price'].median()) 

time: 672 µs


In [10]:
review_data.head(n=2)

Unnamed: 0,user_id,product_id,score,product_title,helpful_votes,old_item_id,item_id,old_user_id
0,0,B00FLH88CO,4,Norwegian Fleece Lined Winter Slippers,9,B00FLH88CO,0,50073594
1,0,B00B3HW97Q,5,2 Pairs Foster Grant Womans Versa Polarized Su...,1,B00B3HW97Q,1,50073594


time: 12.4 ms


In [11]:
type(review_data['product_id'].unique())

numpy.ndarray

time: 18.8 ms


In [12]:
# creating metadata mappings 
titles = review_data['product_title'].unique()
titles_map = {i:val for i,val in enumerate(titles)}
inverse_titles_map = {val:i for i,val in enumerate(titles)}

# price = review_data['price'].unique()
# price_map = {i:val for i,val in enumerate(price)}
# inverse_price_map = {val:i for i,val in enumerate(price)}

# print ("We have %d prices" %price.shape)
print ("We have %d titles" %titles.shape)


# all_info['price_id'] = all_info['price'].map(inverse_price_map)
review_data['title_id'] = review_data['product_title'].map(inverse_titles_map)

# creating dict from 
item2prices = {}
# for val in review_data[['item_id','price_id']].dropna().drop_duplicates().iterrows():
#     item2prices[val[1]["item_id"]] = val[1]["price_id"]

item2titles = {}
# for val in all_info[['item_id','title_id']].dropna().drop_duplicates().iterrows():
#     item2titles[val[1]["item_id"]] = val[1]["title_id"]
        


# populating the rating dataset with item metadata info
# ratings_train["price_id"] = ratings_train["item_id"].map(lambda x : item2prices[x])
# ratings_train["title_id"] = ratings_train["item_id"].map(lambda x : item2titles[x])


# populating the test dataset with item metadata info
# ratings_test["price_id"] = ratings_test["item_id"].map(lambda x : item2prices[x])
# ratings_test["title_id"] = ratings_test["item_id"].map(lambda x : item2titles[x])


We have 51912 titles
time: 193 ms


In [13]:
# ratings_test = pd.read_parquet('./data/ratings_test.parquet')
# ratings_train = pd.read_parquet('./data/ratings_train.parquet')

time: 585 µs


In [14]:
review_data.columns

Index(['user_id', 'product_id', 'score', 'product_title', 'helpful_votes',
       'old_item_id', 'item_id', 'old_user_id', 'title_id'],
      dtype='object')

time: 12.6 ms


### Select products
#### - use ALL product now. 

In [15]:
items = review_data.item_id.unique()
df_items = pd.DataFrame(data=items.flatten(),columns=['item_id'])
df_items = pd.merge(df_items,review_data,how='left',left_on=('item_id'),right_on=('item_id'))

### use all products 
# df_items= df_items.sample(100)

df_items['key'] = 1
print ("We have %d unique items  "%df_items['item_id'].shape[0])
# df_items= df_items[['item_id', 'description', 'category', 'title', 'title_id', 'price', 'price_id', 'brand', 'key']]
df_items= df_items[['item_id',  'product_id', 'score', 'product_title', 'helpful_votes', 'old_item_id', 'old_user_id', 'title_id', 'key']]
print(df_items.shape)
df_items.head(2)

We have 155509 unique items  
(155509, 9)


Unnamed: 0,item_id,product_id,score,product_title,helpful_votes,old_item_id,old_user_id,title_id,key
0,0,B00FLH88CO,4,Norwegian Fleece Lined Winter Slippers,9,B00FLH88CO,50073594,0,1
1,1,B00B3HW97Q,5,2 Pairs Foster Grant Womans Versa Polarized Su...,1,B00B3HW97Q,50073594,1,1


time: 162 ms


### Select 100 users

In [16]:
users = ratings_test.user_id.unique()
df_users = pd.DataFrame(data=users.flatten(),columns=['user_id'])
df_users = pd.merge(df_users,ratings_test,how='left',left_on=('user_id'),right_on=('user_id'))

### Select 100 users
df_users= df_users.sample(100)

df_users['key'] = 1
print ("We have %d unique users  "%df_users['user_id'].shape[0])
df_users= df_users[['user_id', 'key']]
print(df_users.shape)
df_users.head(2)

We have 100 unique users  
(100, 2)


Unnamed: 0,user_id,key
7853,34271,1
11117,19666,1


time: 31 ms


## Merge users and item and items metadata

In [17]:
df_unseenData= pd.merge(df_users, df_items, on='key')
del df_unseenData['key']
print ("We have %d unique records in the recommendation example dataset "%df_unseenData.shape[0])
print(df_unseenData.shape)
df_unseenData.sample(10)

We have 15550900 unique records in the recommendation example dataset 
(15550900, 9)


Unnamed: 0,user_id,item_id,product_id,score,product_title,helpful_votes,old_item_id,old_user_id,title_id
5783857,53341,13600,B0082TV3NO,1,Women's Beach Wedges Platform Massage Thong Sl...,8,B0082TV3NO,52519955,2222
8089620,17705,1268,B00ENLCRQ2,5,MLC EYEWEAR ® Thick Frame Oversized High Fashi...,1,B00ENLCRQ2,47904318,1223
6102369,79968,17398,B004JI9OHE,5,The Highest Heel Women's Brazil - WPAT Pump,2,B004JI9OHE,40333751,12992
14351412,73450,21109,B00D1KQBEU,5,Merrell Men's Traveler Fisher Sandal,1,B00D1KQBEU,15359985,15164
11974418,11212,81,B00M8R9HIE,5,Dansko Women's Tasha Sandal,1,B00M8R9HIE,1864054,81
3800398,24278,34860,B000FHD8SO,5,Rampage Women's Bronner Boot,4,B000FHD8SO,52132895,22556
5853725,53341,55323,B004WZJUHS,3,Corso Como Women's Soiree Ankle-Strap Sandal,1,B004WZJUHS,39870643,32134
13349339,49373,77969,B003TU14RQ,3,Teva - Men's Tanza Sandal,15,B003TU14RQ,11362927,5549
4333308,11511,80671,B006H3749Y,3,ASICS Men's GEL-Cumulus 14 Running Shoe,1,B006H3749Y,6121530,12961
6505543,76442,76897,B0085ZJLG6,4,Chooka Women's High Snake Boot,1,B0085ZJLG6,45041378,35134


time: 9.4 s


In [18]:
df_unseenData.columns

Index(['user_id', 'item_id', 'product_id', 'score', 'product_title',
       'helpful_votes', 'old_item_id', 'old_user_id', 'title_id'],
      dtype='object')

time: 4.18 ms


In [19]:
df_unseenData

Unnamed: 0,user_id,item_id,product_id,score,product_title,helpful_votes,old_item_id,old_user_id,title_id
0,34271,0,B00FLH88CO,4,Norwegian Fleece Lined Winter Slippers,9,B00FLH88CO,50073594,0
1,34271,1,B00B3HW97Q,5,2 Pairs Foster Grant Womans Versa Polarized Su...,1,B00B3HW97Q,50073594,1
2,34271,1,B00B3HW97Q,4,2 Pairs Foster Grant Womans Versa Polarized Su...,2,B00B3HW97Q,34778598,1
3,34271,2,B00J62PECW,1,Skechers Women's Shelby's Zurich Snow Boot,1,B00J62PECW,22272389,2
4,34271,2,B00J62PECW,5,Skechers Women's Shelby's Zurich Snow Boot,1,B00J62PECW,12287747,2
5,34271,3,B00QMGHV26,4,Fabuxry® Casual Canvas Zipper Single Adjustabl...,3,B00QMGHV26,42371996,3
6,34271,4,B00K5B7KAM,3,UGG Women's Bailey Bow Tall,1,B00K5B7KAM,42371996,4
7,34271,5,B0059078VA,4,UGG Kids Girl's Bailey Bow (Big Kid) Black 6 B...,1,B0059078VA,42371996,5
8,34271,5,B0059078VA,5,UGG Kids Girl's Bailey Bow (Big Kid) Black 6 B...,1,B0059078VA,4874603,5
9,34271,5,B0059078VA,5,UGG Kids Girl's Bailey Bow (Big Kid) Black 6 B...,1,B0059078VA,45431108,5


time: 38.9 ms


In [20]:
from os import listdir
from os.path import isfile, join

mypath = '../models'

onlyfiles = [f.replace('.h5', '') for f in listdir(mypath) if isfile(join(mypath, f))]
onlyfiles

['dense_1_Multiply_50_embeddings_4_epochs_dropout',
 'models.txt',
 'dense_5_Multiply_50_embeddings_10_epochs_dropout',
 'matrix_facto_10_embeddings_100_epochs',
 'dense_5_Meta_Multiply_50_embeddings_10_epochs_dropout',
 'dense_1_Multiply_50_embeddings_100_epochs_dropout']

time: 3.91 ms


## Predict the ratings for the items and users in the a recommendation example dataset:
### - dense_5_Multiply_50_embeddings_10_epochs_dropout

In [21]:
load_path = "../models/"

# models =['dense_1_Multiply_50_embeddings_4_epochs_dropout',
#  'dense_5_Multiply_50_embeddings_10_epochs_dropout',
#  'matrix_facto_10_embeddings_100_epochs',
#  'dense_1_Multiply_50_embeddings_100_epochs_dropout']

# select the best model
models =[
 'dense_5_Multiply_50_embeddings_10_epochs_dropout'
]

# models_meta = [
# 'dense_5_Meta_Multiply_50_embeddings_10_epochs_dropout',
# ]

# for mod in models:
#     model = load_model(load_path+mod+'.h5')
#     df_unseenData['preds_' + mod] = model.predict([df_unseenData['user_id'],
#                                                    df_unseenData['item_id'],
#                                                    df_unseenData['price_id'],
#                                                    df_unseenData['title_id']])

for mod in models:
    model = load_model(load_path+mod+'.h5')
    df_unseenData['preds_' + mod] = model.predict([df_unseenData['user_id'],
                                                   df_unseenData['item_id']])


df_unseenData.head(2)





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.











Unnamed: 0,user_id,item_id,product_id,score,product_title,helpful_votes,old_item_id,old_user_id,title_id,preds_dense_5_Multiply_50_embeddings_10_epochs_dropout
0,34271,0,B00FLH88CO,4,Norwegian Fleece Lined Winter Slippers,9,B00FLH88CO,50073594,0,4.044062
1,34271,1,B00B3HW97Q,5,2 Pairs Foster Grant Womans Versa Polarized Su...,1,B00B3HW97Q,50073594,1,4.044519


time: 8min 28s


In [22]:
# df_unseenData.sort_values(by=['preds_dense_5_Multiply_50_embeddings_10_epochs_dropout', 'user_id'], ascending=False)

time: 684 µs


In [23]:
df_unseenData['user_id'].head(n=2)

0    34271
1    34271
Name: user_id, dtype: int64

time: 42.3 ms


In [24]:
df_unseenData.columns

Index(['user_id', 'item_id', 'product_id', 'score', 'product_title',
       'helpful_votes', 'old_item_id', 'old_user_id', 'title_id',
       'preds_dense_5_Multiply_50_embeddings_10_epochs_dropout'],
      dtype='object')

time: 2.69 ms


In [25]:
df_unseenData.shape

(15550900, 10)

time: 44.7 ms


## Check which users exist on the example set

In [26]:
# df_unseenData.T

time: 838 µs


In [27]:
df_unseenData.user_id.value_counts().sort_values(ascending=False).head(5)
df_unseenData[['user_id','preds_dense_5_Multiply_50_embeddings_10_epochs_dropout']].sort_values('preds_dense_5_Multiply_50_embeddings_10_epochs_dropout',ascending=True).head(5)

Unnamed: 0,user_id,preds_dense_5_Multiply_50_embeddings_10_epochs_dropout
9491157,26406,4.043378
9491158,26406,4.043378
9491159,26406,4.043378
9608059,26406,4.04338
9626775,26406,4.043383


time: 3.83 s


## A function that will return recommendation list for a given user

In [28]:
df_unseenData.tail(n=3)

Unnamed: 0,user_id,item_id,product_id,score,product_title,helpful_votes,old_item_id,old_user_id,title_id,preds_dense_5_Multiply_50_embeddings_10_epochs_dropout
15550897,63008,97756,B000074WV6,5,Nike Men's Shox Limitless,12,B000074WV6,34382909,51910,4.043891
15550898,63008,97757,B000072270,5,ASICS&#174; Men's Hyperdistance,1,B000072270,47730138,51911,4.044129
15550899,63008,97757,B000072270,5,ASICS&#174; Men's Hyperdistance,1,B000072270,47730138,51911,4.044129


time: 14 ms


In [29]:
load_path = "../models/"

def get_recommendations(userID , model_scr, df_Data):
    if userID not in df_Data['user_id'].values:
        print("\nUser ID not found %d" %userID)
        return userID
       
#     print("\nRecommendations for user id %d Name: %s is:" % (userID, df_Data.loc[df_Data['user_id'] == userID, 'user_name'].values[0]))
   
    df_output=df_Data.loc[df_Data['user_id'] == userID][['item_id','product_title','helpful_votes', model_scr,    
     ]].sort_values(model_scr,ascending=False).set_index('item_id')
#     print(df_output)
    
    df_output.rename(columns={model_scr: 'score'}, inplace=True)
    df_output = df_output.sort_values(by=['score'], ascending=False)
    
    #add ASIN form item_id
#     df_output['product_id'] = df_Data['item_id'].apply(item_map)
    
    return df_output

time: 2.52 ms


### Recommend items to a given user

- Using dense_5_Multiply_50_embeddings_10_epochs_dropout

In [30]:
df_unseenData.columns

Index(['user_id', 'item_id', 'product_id', 'score', 'product_title',
       'helpful_votes', 'old_item_id', 'old_user_id', 'title_id',
       'preds_dense_5_Multiply_50_embeddings_10_epochs_dropout'],
      dtype='object')

time: 2.79 ms


In [32]:
#######  User ID: 63008                       
df_output = get_recommendations(userID=63008,
                                model_scr='preds_dense_5_Multiply_50_embeddings_10_epochs_dropout', 
                                df_Data=df_unseenData)

print(df_output.shape)
df_output = df_output.drop_duplicates(subset='product_title')
print(df_output.shape)
df_output.head(10)

(155509, 3)
(51912, 3)


Unnamed: 0_level_0,product_title,helpful_votes,score
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3776,KEEN Women's Voyageur Hiking Shoe,2,4.048235
7957,Anuschka 382 Hobo,1,4.046885
7223,Naturalizer Women's Jailene Slingback Sandal,3,4.046423
5850,swedish hasbeens Women's Fideli Platform Sandal,1,4.046172
16906,Tifosi Wisp T Dual-Lens Sunglasses,30,4.04612
17700,FitFlop Women's Frou Dress Sandal,1,4.046052
20054,Qupid Women's Relax-70 Two Way Fold Down Sexy ...,9,4.045922
35585,Capezio Canvas Pirouette II Dance Shoe,2,4.045688
1332,Havaianas Women's Slim Animals Sandal,3,4.045645
13672,Athena Alexander Women's Benny Sandal,1,4.045635


time: 135 ms


## Make predictions for another user using another model:

In [35]:
#######  User ID
user_id = 26406
df_output = get_recommendations(userID=user_id,
                                model_scr='preds_dense_5_Multiply_50_embeddings_10_epochs_dropout',
                                df_Data=df_unseenData)
print(df_output.shape)
df_output = df_output.drop_duplicates(subset='product_title')
print(df_output.shape)
df_output.head(10)

(155509, 3)
(51912, 3)


Unnamed: 0_level_0,product_title,helpful_votes,score
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3776,KEEN Women's Voyageur Hiking Shoe,2,4.045216
20054,Qupid Women's Relax-70 Two Way Fold Down Sexy ...,9,4.044921
16906,Tifosi Wisp T Dual-Lens Sunglasses,1,4.0449
17700,FitFlop Women's Frou Dress Sandal,2,4.044788
34789,Damara Womens Satin Pleated Clutch Bag Wedding...,4,4.044773
1332,Havaianas Women's Slim Animals Sandal,3,4.044656
35585,Capezio Canvas Pirouette II Dance Shoe,2,4.044595
7223,Naturalizer Women's Jailene Slingback Sandal,3,4.044582
7957,Anuschka 382 Hobo,1,4.044561
6186,Crocs Athens Ii,1,4.044554


time: 139 ms


In [36]:
# df_output.columns

time: 570 µs


In [37]:
df_output = df_output.reset_index()
df_output['user_id'] = user_id
df_output['asin'] = df_output['item_id'].apply(lambda x : item_map[x])
df_output['url'] = df_output['item_id'].apply(lambda x : 'https://www.amazon.com/dp/'+item_map[x])
df_output = df_output[['user_id','item_id', 'score', 'asin', 'url', 'product_title']]

time: 92.3 ms


In [38]:
df_output.head()

Unnamed: 0,user_id,item_id,score,asin,url,product_title
0,26406,3776,4.045216,B00138Z6MK,https://www.amazon.com/dp/B00138Z6MK,KEEN Women's Voyageur Hiking Shoe
1,26406,20054,4.044921,B008Y1YZVW,https://www.amazon.com/dp/B008Y1YZVW,Qupid Women's Relax-70 Two Way Fold Down Sexy ...
2,26406,16906,4.0449,B004CJ4WG8,https://www.amazon.com/dp/B004CJ4WG8,Tifosi Wisp T Dual-Lens Sunglasses
3,26406,17700,4.044788,B00EU7R7V4,https://www.amazon.com/dp/B00EU7R7V4,FitFlop Women's Frou Dress Sandal
4,26406,34789,4.044773,B00KYOBJYS,https://www.amazon.com/dp/B00KYOBJYS,Damara Womens Satin Pleated Clutch Bag Wedding...


time: 9.64 ms


In [39]:
unseenUser_list = df_unseenData['user_id'].values
print(len(unseenUser_list))

15550900
time: 1.01 ms


time: 1.09 s


In [41]:
len(unique_reviewer)

100

time: 2.64 ms


### select 100 users from unseen data

In [None]:
unique_reviewer = list(set(unseenUser_list.tolist()))
print("total number of users: ", len(unique_reviewer))

all_predicted_df = pd.DataFrame()

for user_id in unique_reviewer:
    print("selected 100 user_id:", user_id)
    df_output = get_recommendations(userID=user_id ,model_scr='preds_dense_5_Multiply_50_embeddings_10_epochs_dropout',df_Data=df_unseenData)
    df_output = df_output.reset_index()
    df_output['user_id'] = user_id
    df_output['asin'] = df_output['item_id'].apply(lambda x : item_map[x])
    df_output['url'] = df_output['item_id'].apply(lambda x : 'https://www.amazon.com/dp/'+item_map[x])
    df_output = df_output[['user_id','item_id', 'score', 'asin', 'url', 'product_title']]
    df_output = df_output.sort_values(by=['score'], ascending=False)
    
#     print(df_output.shape)
    df_output = df_output.drop_duplicates(subset='product_title')
#     print(df_output.shape)
    
    ####### select top  product pre user
    df_output = df_output.head(n=50)
    
    #concat
    all_predicted_df = all_predicted_df.append(df_output)
    
# reset index    
all_predicted_df = all_predicted_df.reset_index(drop=True)

total number of users:  100
selected 100 user_id: 30209
selected 100 user_id: 10247
selected 100 user_id: 20497
selected 100 user_id: 16402
selected 100 user_id: 19
selected 100 user_id: 39961
selected 100 user_id: 63008
selected 100 user_id: 80941
selected 100 user_id: 22574
selected 100 user_id: 83507


In [None]:
# all_predicted_df

In [None]:
all_predicted_df.shape

In [None]:
all_predicted_df.columns

In [None]:
# all_predicted_df = all_predicted_df.drop_duplicates() 
# all_predicted_df = all_predicted_df.reset_index()
# all_predicted_df.drop(columns=['index'])

In [None]:
all_predicted_df.shape

In [None]:
#Shoes_for_100_users_per_20_products_prediction_Ver2.csv
# all_predicted_df.to_csv('Shoes_for_100_users_per_20_products_prediction_Ver3.csv', header=True, index=False)


# Shoes_for_100_users_per_100_products_prediction_Ver2
# all_predicted_df.to_csv('Shoes_for_100_users_per_100_products_prediction_Ver3.csv', header=True, index=False)

#Shoes_for_100_users_per_50_products_prediction_Ver2.csv
all_predicted_df.to_csv('Shoes_for_100_users_per_50_products_prediction_Ver3.csv', header=True, index=False)



#Shoes_for_100_users_per_ALL_products_prediction_Ver2.csv
# all_predicted_df.to_csv('Shoes_for_100_users_per_ALL_products_prediction_Ver3.csv', header=True, index=False)

#Shoes_for_ALL_users_per_ALL_products_prediction_Ver2.csv
# all_predicted_df.to_csv('Shoes_for_ALL_users_per_ALL_products_prediction_Ver3.csv', header=True, index=False)

In [None]:
# !aws s3 cp Shoes_for_ALL_users_per_ALL_products_prediction_Ver2.csv s3://dse-cohort5-group1/3-Keras-DeepRecommender-for-Shoes/predictions/Shoes_for_ALL_users_per_ALL_products_prediction_Ver2.csv