## Collaborative filtering Recommendation system 

In [126]:
# Utilities
import math, random, warnings
from time import time
from datetime import datetime
from collections import defaultdict
from IPython.core.interactiveshell import InteractiveShell
from tabulate import tabulate
from IPython.display import display
# Mathematical calculation
import numpy as np
from scipy.sparse.linalg import svds
from sklearn import model_selection
from sklearn.metrics.pairwise import cosine_similarity

# Data handling
import pandas as pd

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# scikit-surprise recommender package
from surprise import SVD, KNNWithMeans
from surprise import Dataset, Reader, accuracy
from surprise.model_selection import train_test_split, GridSearchCV
from surprise.prediction_algorithms.baseline_only import BaselineOnly

In [84]:
products = pd.read_csv('allAmazons.csv')

In [85]:
products

Unnamed: 0,name,link-href,asin
0,"Apple iPhone 7 Plus with FaceTime - 256GB, 4G ...",https://www.amazon.eg/-/en/Apple-iPhone-Plus-F...,B0771RT4PM
1,motorola Razr 5G (2020) 256GB ROM + 8GB RAM Fa...,https://www.amazon.eg/-/en/motorola-Factory-Un...,B08KJJ56FV
2,A80s 4GB 64GB internal memory 6.217 inches 4G ...,https://www.amazon.eg/-/en/A80s-internal-memor...,B08TWN358F
3,SUNMI V2 Pro Android Handled PDA Phone,https://www.amazon.eg/-/en/SUNMI-Pro-Android-H...,B08VJ9BF6V
4,Blackview A80 Plus 4GB Ram 64GB - Black,https://www.amazon.eg/-/en/Blackview-A80-Plus-...,B08W1W32N7
...,...,...,...
6734,Kieslect YFT2030EU KR Pro Smart Watch 2 Straps...,https://www.amazon.eg/-/en/Kieslect-YFT2030EU-...,B0BHPT8L26
6735,"Huawei Band 8 Smartwatch, Fitness Tracker, Sli...",https://www.amazon.eg/-/en/Huawei-Smartwatch-F...,B0BZY41481
6736,Oraimo Watch 2 Pro OSW-32 BT Call Quickly Repl...,https://www.amazon.eg/-/en/Oraimo-OSW-32-Quick...,B0B5B77WTY
6737,"Oraimo Smart watch OSW-16, BrightSilver + 12 M...",https://www.amazon.eg/-/en/Oraimo-OSW-16-Brigh...,B09BZLKRHC


#### Real data [userID, ProductID, rating] should be replaced

In [2]:
ratings = pd.read_csv('final_final_ratings.csv')

In [3]:
ratings

Unnamed: 0,userId,productId,rating
0,A4WEZJOIZIV4U,B0771RT4PM,5
1,A3BKNXX8QFIXIV,B0771RT4PM,5
2,ASB4QQBKZD6W2,B0771RT4PM,3
3,A3QRW0UJPKIAX7,B0771RT4PM,5
4,AIBRTGBN07D6A,B0771RT4PM,4
...,...,...,...
22691,AVFIQ58VHSFZH,B0BHPT8L26,5
22692,AOVMTR7PA8XB9,B0BZY41481,5
22693,ABUWUTWYZ8V1R,B0B5B77WTY,4
22694,A2V026TK7H80V3,B09BZLKRHC,3


In [4]:
# Divide the dataset in 70:30 ratio
trainset, testset = model_selection.train_test_split(ratings, test_size=0.3, random_state=5)

In [5]:
trainset.head()

Unnamed: 0,userId,productId,rating
22580,A3VCZYKQIIZPSY,B09C4WM8VB,5
6491,A2FG061KDA1ARB,B0CMZ9J7GZ,5
8325,AUYAZMEO659OX,B0BVT98NBZ,5
10132,A2CZDT27T36DBB,B09BDNFDNN,5
20649,A1VVRA3P6G1TWH,B01IEZ9VP2,5


In [6]:
testset.head()

Unnamed: 0,userId,productId,rating
11372,A1CST2WUA32GP0,B0BWSH8J3W,3
12721,A2UOHALGF2X77Q,B07MWCNR3W,4
15750,ALNE7Q71JFC0F,B098V2CFFS,4
3575,A7WF9BHWSCRGW,B09C1356VM,5
18680,A2XJMQ2COPGWJH,B0BY937F79,4


In [7]:
print('Shape of the training set  :', trainset.shape)
print('Shape of the test set      :', testset.shape)

Shape of the training set  : (15887, 3)
Shape of the test set      : (6809, 3)


In [8]:
# Create the User-Item sparse matrix
user_item = ratings.pivot(index='userId', columns='productId', values='rating').fillna(0)
print('Shape of User-Item sparse matrix:', user_item.shape)
user_item.head()

Shape of User-Item sparse matrix: (2209, 6282)


productId,B000UXZQ42,B0016MNBAM,B001IDZG0W,B001QSZ8BG,B001SH6N8W,B002J23III,B002RAQND4,B0033B4310,B0035ERKYW,B003D8ZT0C,...,B0CN6ZHVTG,B0CN73HNV5,B0CN74CH58,B0CN78DYC2,B0CN7C8TLD,B0CNNQ2HGH,B0CNQ5FZ6Z,B0CNS35XVZ,B0CNS6F39B,B0CP4FL3HQ
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A100UD67AHFODS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
A100WO06OQR8BQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A105S56ODHGJEK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A105TOJ6LTVMBG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A109XLG7SJQAIA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# Create the Item-User sparse matrix
item_user = ratings.pivot(index='productId', columns='userId', values='rating').fillna(0)
print('Shape of Item-User sparse matrix:', item_user.shape)
item_user.head()

Shape of Item-User sparse matrix: (6282, 2209)


userId,A100UD67AHFODS,A100WO06OQR8BQ,A105S56ODHGJEK,A105TOJ6LTVMBG,A109XLG7SJQAIA,A10AFVU66A79Y1,A10CRW7XRJBJ2G,A10G4BPT5MGBHY,A10JPZAYDGFHEV,A10JVQAKUOK2A7,...,AZFF4CX9MQ4AE,AZMY6E8B52L2T,AZNUHQSHZHSUE,AZPOUCM043IY8,AZQGJ5CEAJGXB,AZV2U6GU5QA6C,AZXQ0WME7X6UT,AZYJE40XW6MFG,AZZ5ASC403N74,AZZYW4YOE1B6E
productId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B000UXZQ42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B0016MNBAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B001IDZG0W,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B001QSZ8BG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B001SH6N8W,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# Calculate the user-user similarity
user_similarity = cosine_similarity(user_item)
np.fill_diagonal(user_similarity, 0)
user_similarity_df = pd.DataFrame(user_similarity,index=user_item.index, columns=user_item.index)
user_similarity_df.head()

userId,A100UD67AHFODS,A100WO06OQR8BQ,A105S56ODHGJEK,A105TOJ6LTVMBG,A109XLG7SJQAIA,A10AFVU66A79Y1,A10CRW7XRJBJ2G,A10G4BPT5MGBHY,A10JPZAYDGFHEV,A10JVQAKUOK2A7,...,AZFF4CX9MQ4AE,AZMY6E8B52L2T,AZNUHQSHZHSUE,AZPOUCM043IY8,AZQGJ5CEAJGXB,AZV2U6GU5QA6C,AZXQ0WME7X6UT,AZYJE40XW6MFG,AZZ5ASC403N74,AZZYW4YOE1B6E
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A100UD67AHFODS,0.0,0.0,0.0,0.101247,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.143925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A100WO06OQR8BQ,0.0,0.0,0.0,0.141535,0.0,0.0,0.128459,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A105S56ODHGJEK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A105TOJ6LTVMBG,0.101247,0.141535,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.182246,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A109XLG7SJQAIA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# Calculate the item-item similarity
item_similarity = cosine_similarity(item_user)
np.fill_diagonal(item_similarity, 0)
item_similarity_df = pd.DataFrame(item_similarity, index=item_user.index, columns=item_user.index)
np.set_printoptions(threshold=np.inf)
np.seterr(over='ignore')
item_similarity_df.head()

productId,B000UXZQ42,B0016MNBAM,B001IDZG0W,B001QSZ8BG,B001SH6N8W,B002J23III,B002RAQND4,B0033B4310,B0035ERKYW,B003D8ZT0C,...,B0CN6ZHVTG,B0CN73HNV5,B0CN74CH58,B0CN78DYC2,B0CN7C8TLD,B0CNNQ2HGH,B0CNQ5FZ6Z,B0CNS35XVZ,B0CNS6F39B,B0CP4FL3HQ
productId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B000UXZQ42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B0016MNBAM,0.0,0.0,0.0,0.0,0.267843,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B001IDZG0W,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B001QSZ8BG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B001SH6N8W,0.0,0.267843,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.093726,0.0


In [12]:
# Method to find top N neighbors
def find_n_neighbors(df,n):
    order = np.argsort(df.values, axis=1)[:, :n]
    df = df.apply(axis=1, func=lambda x: pd.Series(x.sort_values(ascending=False).iloc[:n].index,
                                                   index=['top{}'.format(i) for i in range(1, n+1)]))
    return df

In [13]:
# Find 10 neighbors of each user
user_10_neighbors = find_n_neighbors(user_similarity_df, 10)
user_10_neighbors.head(10)

Unnamed: 0_level_0,top1,top2,top3,top4,top5,top6,top7,top8,top9,top10
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
A100UD67AHFODS,A2OOLI2WFY4L2,AZMY6E8B52L2T,AT53ZTTO707MB,A1KD8NJPZ01R37,A3QQGPI3CTV9SX,A2XX2A4OJCDNLZ,A162AA1DSK4OGW,ADAXXCMSLC0U9,A105TOJ6LTVMBG,A2W0GY64CJSV5D
A100WO06OQR8BQ,AAK6SOEJY30YG,AZBXKUH4AIW3X,A1ZXMMQPYC3Z9I,A12LH2100CKQO,A3L1VJMHFWONCB,A2ELKOZRB1JY8U,A105TOJ6LTVMBG,A2X6J6AFLLYVXH,A313DADVI76HKM,A33CNFK776MTWR
A105S56ODHGJEK,AM3XNQU9TZBD8,A1WJ3P43SZUNDM,A30UP2KKD5IQEP,A2UKE7GIVC7XFW,A2LNVVL2EJ057F,A3VBXQKRM7A4JR,A3CBCVHIK3G76X,A1WKQ94M45D8MG,A2SZLNSI5KOQJT,AMRMK86X3PKXD
A105TOJ6LTVMBG,A3OIE7IE9LTPJG,ADAXXCMSLC0U9,ATLA5CJH0ZZHP,AG6JJG6UOKBX,A1IZ6UH9V32GLG,A12LTYDEZDUEIV,A3ADKW0YREZD52,A1JVK6EX9KV7P,AZMY6E8B52L2T,A1AFS9M75F17IZ
A109XLG7SJQAIA,A3IQFC5I1NA9EU,A25HBO5V8S8SEA,A21GW4L8FO307V,AHJ3Y5H9NAPAO,AZCOE68N2HOCZ,A1PS4OYWUB0VX,A3QL2115DZCA62,A1UI6F76CPCG8Q,AY40BONSSV2NF,AV9PIER7NE448
A10AFVU66A79Y1,A1H55L0BLPCWYF,A399GPFKUZJC0F,A13H2ZRENPV9E,A1KLPS6P7UR631,A2J8DE8R94ZB02,A1FF2PLA84KNAE,A1H6SBO7R007I8,AOEP96629WHHA,A3VBZDYGHF4NK8,A31RULW0KNYJ5H
A10CRW7XRJBJ2G,A2P8JK1L1WHDOV,A399GPFKUZJC0F,A1JVOV54H4ZVFC,AZBXKUH4AIW3X,A1ZXMMQPYC3Z9I,A2ELKOZRB1JY8U,A1U6XPYATU9OPG,AAJ1IYOUIHWF,A3ULWD7PM5OFKU,A2X6J6AFLLYVXH
A10G4BPT5MGBHY,A2TVH2OBNXYXHV,A379DLIAF0MFM5,A1QNYLBG73QRNF,A23ZNGL704AW7O,AGFSSC5DBY2HU,A2VAD79S4A06DD,A186697K4XKXQL,A1B6O7SAIYG2N0,A3LZA698SQPCXE,A2KOV8XWZOZ0FQ
A10JPZAYDGFHEV,A3VZH0PWLQ9BB1,A2NN1KIYEM2SKS,A1B8EJQU73YGL9,A2J6LVPWDFMIIZ,A2VJ5ALN1BIV3B,A1MNX0HKE40X7W,A3INN916E4VION,ASBJ67EWEBWTZ,AEB5O5LHW0TIX,A236V1U1P2K4OG
A10JVQAKUOK2A7,A3PCEB9ND82AGE,A3T3JW81QUXVSJ,ATLZNVLYKP9AZ,A1BBYSMT9YCODI,A2L4W1M5YU5QX3,A1C0SOX7DPXY65,ATNOI7E8GTIVE,AN81JUYW2SL24,A18MBO1U4DPY20,A3QL2115DZCA62


In [14]:
# Find 10 neighbors of each item
item_10_neighbors = find_n_neighbors(item_similarity_df, 10)
item_10_neighbors.head(10)

Unnamed: 0_level_0,top1,top2,top3,top4,top5,top6,top7,top8,top9,top10
productId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
B000UXZQ42,B0C8M8PWJ5,B098W1QVB8,B091CV3C6F,B07MDZNJHQ,B07W7K8MW2,B09R844P7Q,B098TXPYMH,B0BTTXPC9F,B08WJN3H92,B07WHPY1PB
B0016MNBAM,B0BPK4Q778,B0C57DWXH6,B077DC9XQH,B091J5GXH4,B0CK4CXPLN,B091J8DQ5W,B08FB387V4,B08QRQ2LW7,B092SS49WC,B09WNF8HCY
B001IDZG0W,B0BW6NV6PR,B0966VMJ86,B09HSTLRL2,B096DF1HJC,B0C5N5FR4S,B09XXTBKHH,B006B3PADC,B0897Y5Q6Y,B014R01S4U,B07MX7RBQY
B001QSZ8BG,B0796N1XWY,B08YP3C5KX,B0BVZSYYDF,B09FHRGPKK,B09P5J4YKR,B09HQV7LH9,B09B29CYCF,B09M9XFGB5,B09PGBND63,B0B5RQKT3F
B001SH6N8W,B098TWKMSZ,B07PJ8VFBH,B06WGLXPM8,B091J5GXH4,B0BYLQVN78,B09ZYR8VLN,B09232BW8P,B07KQWR72Z,B077DC9XQH,B09P43JLPL
B002J23III,B075LD4YY6,B015WCV6QM,B016I46XSE,B09ZJXF5NF,B0BW6NH1JM,B08TR2JL4B,B0B2FFMKKR,B07MW8L8L5,B0BW6MKM2W,B0847Q3JJF
B002RAQND4,B08RYYDRN3,B0CLDRKS3X,B07R84Q1RF,B07VCFS1WM,B09PLBZTNG,B09HN6L3JW,B07MX1M7TF,B095CXGD4V,B0976FZN5F,B08PFVM6YB
B0033B4310,B07GSY1ZQR,B00VB4CXLI,B091C7FVM4,B07CMS5Q6P,B091J8GJVY,B09FQDW6S8,B01N0Z1YKE,B089YS7GV3,B09JCQ7K5P,B09DJC1XQM
B0035ERKYW,B096QZSSPN,B0B5TYHS7B,B01MXDDZ5B,B08QSKPJCX,B0C4JRWJGM,B09QLG4695,B07QC3H1SL,B07SRZMN48,B097LRNNW9,B096DGXD6C
B003D8ZT0C,B07MXQPF1Q,B0CD9BGGMT,B0BXY3T4C6,B07JW1JDCJ,B098TW571B,B09L53VY4C,B09VFY4RTM,B0BMTHL29Z,B0C7CQT9ZS,B01N7S0IPR


In [15]:
def get_users_similar_products(user1, user2):
    common_products = ratings[ratings.userId == user1].merge(
    ratings[ratings.userId == user2],
    on = "productId",
    how = "inner" )
    return common_products[['rating_x', 'rating_y', 'productId']].head()

In [16]:
# Check the similarity of two users
get_users_similar_products('A100UD67AHFODS', 'A3CG93783LP0FO')

Unnamed: 0,rating_x,rating_y,productId
0,5,4,B0CJVXW7DG


In [17]:
# Method to predict the rating
def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)  
        mean_user_rating = np.array(mean_user_rating)
        #We use np.newaxis so that mean_user_rating has same format as ratings
        ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred

In [18]:
user_item.shape

(2209, 6282)

In [19]:
user_similarity.shape

(2209, 2209)

In [20]:
#predict user rating for all products
user_prediction = predict(user_item, user_similarity, type='user')
user_prediction = pd.DataFrame(user_prediction, index=user_item.index, columns=user_item.columns)
user_prediction.head()

productId,B000UXZQ42,B0016MNBAM,B001IDZG0W,B001QSZ8BG,B001SH6N8W,B002J23III,B002RAQND4,B0033B4310,B0035ERKYW,B003D8ZT0C,...,B0CN6ZHVTG,B0CN73HNV5,B0CN74CH58,B0CN78DYC2,B0CN7C8TLD,B0CNNQ2HGH,B0CNQ5FZ6Z,B0CNS35XVZ,B0CNS6F39B,B0CP4FL3HQ
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A100UD67AHFODS,0.006498,0.006498,0.012792,0.006498,0.029327,0.006498,0.044822,0.026795,0.041836,0.006498,...,0.026168,0.120025,0.050635,0.0576,0.006498,0.060909,0.09605,0.075573,0.820225,0.006498
A100WO06OQR8BQ,0.000609,0.000609,0.000609,0.000609,0.046489,0.000609,0.000609,0.000609,0.000609,0.000609,...,0.000609,0.000609,0.000609,0.000609,0.000609,0.000609,0.000609,0.000609,0.095797,0.054623
A105S56ODHGJEK,-0.002394,-0.002394,0.023597,-0.002394,0.010988,-0.002394,-0.002394,-0.002394,-0.002394,-0.002394,...,0.026431,-0.002394,0.03061,0.041167,-0.002394,-0.00055,0.051813,0.106091,0.140693,0.041682
A105TOJ6LTVMBG,-0.002687,-0.002687,-0.002687,-0.002687,-0.002687,-0.002687,-0.002687,-0.002687,-0.002687,-0.002687,...,-0.002687,0.087369,-0.002687,-0.002687,-0.002687,0.057744,-0.002687,-0.002687,0.058429,-0.002687
A109XLG7SJQAIA,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,...,0.109571,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259,-0.000259


In [21]:
item_prediction = predict(user_item, item_similarity, type='item')
item_prediction.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,6272,6273,6274,6275,6276,6277,6278,6279,6280,6281
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A100UD67AHFODS,0.0,0.0,0.03932,0.0,0.021616,0.0,0.172188,0.054201,0.155804,0.0,...,0.024996,0.038561,0.033648,0.052701,0.0,0.020494,0.038678,0.013838,0.239328,0.0
A100WO06OQR8BQ,0.0,0.0,0.0,0.0,0.02076,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011438,0.052675
A105S56ODHGJEK,0.0,0.0,0.030835,0.0,0.006497,0.0,0.0,0.0,0.0,0.0,...,0.018782,0.0,0.01705,0.01702,0.0,0.004232,0.005623,0.025019,0.012989,0.024472
A105TOJ6LTVMBG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.008895,0.0,0.0,0.0,0.007267,0.0,0.0,0.005415,0.0
A109XLG7SJQAIA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.06839,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [87]:
# Method to Recommend the items with the highest predicted ratings
def recommend_items(userId, orig_df, preds_df, top_n):
    # Get and sort the user's ratings
    sorted_user_ratings = orig_df.loc[userId].sort_values(ascending=False) #sorted_user_ratings
    
    sorted_user_predictions = preds_df.loc[userId].sort_values(ascending=False) #sorted_user_predictions
    
    
    # Prepare recommendations
    recommedations = pd.concat([sorted_user_ratings, sorted_user_predictions], axis=1)
    
    recommedations.index.name = 'Recommended Items'
    
    recommedations.columns = ['user_ratings', 'user_predictions']
    
    # Take the products which user has NOT rated
    recommedations = recommedations.loc[recommedations.user_ratings == 0] 
    
    recommedations = recommedations.sort_values('user_predictions', ascending=False)
    
    #print(recommedations[:top_n])
    return recommedations.head(top_n)

In [140]:
def get_result(userID, no_products):
    selected_columns = ['asin', 'name','user_ratings', 'user_predictions']
    column_mapping = {'asin': 'RProductID', 'name': 'RProductName'}
    
    recom_UBCF = recommend_items(userID, user_item, user_prediction, no_products)
    #get names for products
    recom_UBCF_with_PN = pd.merge(recom_UBCF, products, left_index=True, right_on='asin')
    recom_UBCF_with_PN = recom_UBCF_with_PN[selected_columns]
    recom_UBCF_with_PN = recom_UBCF_with_PN.rename(columns=column_mapping)

    recom_IBCF = recommend_items(userID, user_item, item_prediction, no_products)
    recom_IBCF_with_PN = pd.merge(recom_IBCF, products, left_index=True, right_on='asin')
    recom_IBCF_with_PN = recom_IBCF_with_PN[selected_columns]
    recom_IBCF_with_PN = recom_IBCF_with_PN.rename(columns=column_mapping)
    
    return recom_UBCF_with_PN, recom_IBCF_with_PN 

In [141]:
# Find recommendation for couple of users using UBCF
users = {'A100UD67AHFODS': 6, 'A100WO06OQR8BQ': 3, 'A10Y058K7B96C6': 8}  

for user in users:
    UBCF, IBCF = get_result(user, find_recom[user])
    print("Top %d recommendations for the userId: %s" %(find_recom[user],user))
    print("-----------------------------")
    display(UBCF)
    display(IBCF) 
    print("----------------------------------------------------------")

Top 6 recommendations for the userId: A100UD67AHFODS
-----------------------------


Unnamed: 0,RProductID,RProductName,user_ratings,user_predictions
15,B0C1ZRDGM2,"IKU A23 Dual SIM, 32GB Memory, 2GB RAM - Salte...",0.0,0.252135
139,B09TKDJL9M,realme C35 Dual-Sim 64GB ROM + 4GB RAM (GSM on...,0.0,0.206745
307,B0BXNY4YD3,Samsung galaxy a34 dual sim mobile phone andro...,0.0,0.180583
9,B0BBMGZ5T4,realme narzo 50i Prime (Dark Blue 3 GB RAM +32...,0.0,0.178337
931,B07T9KJMCG,"DOQAUS Wireless Bluetooth Headphones, 3 EQ Mod...",0.0,0.147761
187,B09C1356VM,SAMSUNG Galaxy Z FOLD3 5G Single SIM and e SIM...,0.0,0.14459


Unnamed: 0,RProductID,RProductName,user_ratings,user_predictions
6252,B0BP8B6K27,U78 Smart Watch Body Temperature Blood Oxygen ...,0.0,
3687,B0BPD1RNNR,EESOME M10 TWS Bluetooth 5.1 Earphone Charging...,0.0,
6468,B0BP89FS1D,Y5 Touch Smart Watch Compatible with Android a...,0.0,
735,B0BPCX7NBC,Legion 5 Pro 16ACH6H Gaming Laptop Ryzen7-5800...,0.0,
1388,B0BPBS36FD,Redragon H386 Diomedes Wired Gaming Headset - ...,0.0,
383,B0BP97CYJ8,Lenovo IdeaPad Gaming 3 Laptop - Ryzen 5 5600H...,0.0,


----------------------------------------------------------
Top 3 recommendations for the userId: A100WO06OQR8BQ
-----------------------------


Unnamed: 0,RProductID,RProductName,user_ratings,user_predictions
113,B07WFPLZGW,"VIVO Y02, 2GB RAM, 32GB ROM - Orchid Blue",0.0,0.237547
103,B09PZ81C73,"Infinix Hot 11 Play Android Mobile Phone, 64GB...",0.0,0.212439
2,B08TWN358F,A80s 4GB 64GB internal memory 6.217 inches 4G ...,0.0,0.189346


Unnamed: 0,RProductID,RProductName,user_ratings,user_predictions
3543,B0BMXL67XS,WIWU Airbuds Pro 2 SE TWS Wireless BT In-Ear B...,0.0,
3518,B0BPLHLZRD,iPhone Wired Earbuds Lightning Apple Headphone...,0.0,
718,B0BPK4Q778,"Lenovo V15 G2 Laptop - Ryzen 5 5500U 6-Cores, ...",0.0,


----------------------------------------------------------
Top 8 recommendations for the userId: A10Y058K7B96C6
-----------------------------


Unnamed: 0,RProductID,RProductName,user_ratings,user_predictions
0,B0771RT4PM,"Apple iPhone 7 Plus with FaceTime - 256GB, 4G ...",0.0,0.355971
11,B0BWQPV33L,DOOGEE X95 Smartphone,0.0,0.294035
7,B09NB379GF,Darago D1-1.77-inch Dual SIM Mobile Phone - Wh...,0.0,0.219266
17,B0C7QCHGLF,"Motorola razr 40 Ultra (Viva Magenta, 8GB RAM,...",0.0,0.210341
33,B0CKHLFN7G,OPPO A18 128GB 4GB Glowing Blue UAE VERSION,0.0,0.1776
22,B0CCSV3RNL,ACE URBAN3 4G Dual SIM Smartphone 2G RAM 32G R...,0.0,0.161076
5,B08YFF6SZM,"Redmi Note 10 Dual SIM - 6.43 Inches, 128 GB, ...",0.0,0.158608
96,B00VQ9GK3K,Xiaomi redmi note 11s dual sim amoled dotdispl...,0.0,0.154749


Unnamed: 0,RProductID,RProductName,user_ratings,user_predictions
779,B0BP9X3TS5,HP Probook 450 G8 Laptop - 11th Intel Core i5-...,0.0,
6188,B0BPK3JSR9,New F8 Ultra max Smartwatch Series 8 I S8 49mm...,0.0,
3518,B0BPLHLZRD,iPhone Wired Earbuds Lightning Apple Headphone...,0.0,
672,B0BPK6MLC8,Lenovo IdeaPad 3 15ALC6 Laptop - Ryzen 7 5700U...,0.0,
718,B0BPK4Q778,"Lenovo V15 G2 Laptop - Ryzen 5 5500U 6-Cores, ...",0.0,
6522,B0BPK4KYBN,Smart Watch For Men Women X8 Plus Ultra Black ...,0.0,
383,B0BP97CYJ8,Lenovo IdeaPad Gaming 3 Laptop - Ryzen 5 5600H...,0.0,
5814,B0BP9K91JJ,LENOVO Legion 5 15ACH6H Laptop - Ryzen 5 5600H...,0.0,


----------------------------------------------------------


### Performance Metrics

In [155]:
def calculate_rmse(orig_df, preds_df):
    rmse_df = pd.concat([orig_df.mean(), preds_df.mean()], axis=1)
    rmse_df.columns = ['Avg_actual_ratings', 'Avg_predicted_ratings']
    RMSE = round(np.sqrt(((rmse_df.Avg_actual_ratings - rmse_df.Avg_predicted_ratings) ** 2).mean()), 5)*100
    print('\nRMSE for this recommender model = {}%\n'.format(RMSE))
    return rmse_df.head()

In [156]:
RMSE = calculate_rmse(user_item, user_prediction)


RMSE for this recommender model = 0.364%

