In [69]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import seaborn as sns
import joblib
from scipy.sparse.linalg import svds
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
import gzip
import dask as dd

In [5]:
def parse(path):
  g = gzip.open(path, 'rb')
  for l in g:
    yield eval(l)

def getDF(path):
  i = 0
  df = {}
  for d in parse(path):
    df[i] = d
    i += 1
  return pd.DataFrame.from_dict(df, orient='index')

dfmeta = getDF('./meta_Electronics1.json.gz')

In [6]:
dfmeta.head()

Unnamed: 0,asin,imUrl,description,categories,title,price,salesRank,related,brand
0,132793040,http://ecx.images-amazon.com/images/I/31JIPhp%...,The Kelby Training DVD Mastering Blend Modes i...,"[[Electronics, Computers & Accessories, Cables...",Kelby Training DVD: Mastering Blend Modes in A...,,,,
1,321732944,http://ecx.images-amazon.com/images/I/31uogm6Y...,,"[[Electronics, Computers & Accessories, Cables...",Kelby Training DVD: Adobe Photoshop CS5 Crash ...,,,,
2,439886341,http://ecx.images-amazon.com/images/I/51k0qa8f...,Digital Organizer and Messenger,"[[Electronics, Computers & Accessories, PDAs, ...",Digital Organizer and Messenger,8.15,{'Electronics': 144944},"{'also_viewed': ['0545016266', 'B009ECM8QY', '...",
3,511189877,http://ecx.images-amazon.com/images/I/41HaAhbv...,The CLIKR-5 UR5U-8780L remote control is desig...,"[[Electronics, Accessories & Supplies, Audio &...",CLIKR-5 Time Warner Cable Remote Control UR5U-...,23.36,,"{'also_viewed': ['B001KC08A4', 'B00KUL8O0W', '...",
4,528881469,http://ecx.images-amazon.com/images/I/51FnRkJq...,"Like its award-winning predecessor, the Intell...","[[Electronics, GPS & Navigation, Vehicle GPS, ...",Rand McNally 528881469 7-inch Intelliroute TND...,299.99,,"{'also_viewed': ['B006ZOI9OY', 'B00C7FKT2A', '...",


In [7]:
dfmeta.shape

(498196, 9)

In [9]:
dfreviews=pd.read_csv('./ratings_Electronics.csv', header=None,sep=',')

In [10]:
dfreviews.head()

Unnamed: 0,0,1,2,3
0,AKM1MP6P0OYPR,132793040,5.0,1365811200
1,A2CX7LUOHB2NDG,321732944,5.0,1341100800
2,A2NWSAGRHCP8N5,439886341,1.0,1367193600
3,A2WNBOD3WNDNKT,439886341,3.0,1374451200
4,A1GI0U4ZRJA8WN,439886341,1.0,1334707200


In [11]:
dfreviews.columns = ["reviewerID", "asin","overall","timestamp"]
dfreviews.head()

Unnamed: 0,reviewerID,asin,overall,timestamp
0,AKM1MP6P0OYPR,132793040,5.0,1365811200
1,A2CX7LUOHB2NDG,321732944,5.0,1341100800
2,A2NWSAGRHCP8N5,439886341,1.0,1367193600
3,A2WNBOD3WNDNKT,439886341,3.0,1374451200
4,A1GI0U4ZRJA8WN,439886341,1.0,1334707200


In [12]:
dfreviews.shape

(7824482, 4)

In [13]:
product_ratings=pd.merge(dfreviews,dfmeta,on='asin',how='inner')

In [14]:
product_ratings.head()

Unnamed: 0,reviewerID,asin,overall,timestamp,imUrl,description,categories,title,price,salesRank,related,brand
0,AKM1MP6P0OYPR,132793040,5.0,1365811200,http://ecx.images-amazon.com/images/I/31JIPhp%...,The Kelby Training DVD Mastering Blend Modes i...,"[[Electronics, Computers & Accessories, Cables...",Kelby Training DVD: Mastering Blend Modes in A...,,,,
1,A2CX7LUOHB2NDG,321732944,5.0,1341100800,http://ecx.images-amazon.com/images/I/31uogm6Y...,,"[[Electronics, Computers & Accessories, Cables...",Kelby Training DVD: Adobe Photoshop CS5 Crash ...,,,,
2,A2NWSAGRHCP8N5,439886341,1.0,1367193600,http://ecx.images-amazon.com/images/I/51k0qa8f...,Digital Organizer and Messenger,"[[Electronics, Computers & Accessories, PDAs, ...",Digital Organizer and Messenger,8.15,{'Electronics': 144944},"{'also_viewed': ['0545016266', 'B009ECM8QY', '...",
3,A2WNBOD3WNDNKT,439886341,3.0,1374451200,http://ecx.images-amazon.com/images/I/51k0qa8f...,Digital Organizer and Messenger,"[[Electronics, Computers & Accessories, PDAs, ...",Digital Organizer and Messenger,8.15,{'Electronics': 144944},"{'also_viewed': ['0545016266', 'B009ECM8QY', '...",
4,A1GI0U4ZRJA8WN,439886341,1.0,1334707200,http://ecx.images-amazon.com/images/I/51k0qa8f...,Digital Organizer and Messenger,"[[Electronics, Computers & Accessories, PDAs, ...",Digital Organizer and Messenger,8.15,{'Electronics': 144944},"{'also_viewed': ['0545016266', 'B009ECM8QY', '...",


In [15]:
product_ratings.shape

(7824482, 12)

In [16]:
product_ratings.drop(['imUrl','description','categories','price','salesRank','related','brand'], axis=1, inplace=True)

In [17]:
product_ratings.drop(['timestamp'],axis=1,inplace=True)

In [18]:
product_ratings.head()

Unnamed: 0,reviewerID,asin,overall,title
0,AKM1MP6P0OYPR,132793040,5.0,Kelby Training DVD: Mastering Blend Modes in A...
1,A2CX7LUOHB2NDG,321732944,5.0,Kelby Training DVD: Adobe Photoshop CS5 Crash ...
2,A2NWSAGRHCP8N5,439886341,1.0,Digital Organizer and Messenger
3,A2WNBOD3WNDNKT,439886341,3.0,Digital Organizer and Messenger
4,A1GI0U4ZRJA8WN,439886341,1.0,Digital Organizer and Messenger


In [19]:
product_ratings.shape

(7824482, 4)

In [20]:
product_ratings = product_ratings.rename(columns={'reviewerID': 'userID', 'asin': 'prod_ID','title': 'prod_name',
                                                 'overall': 'rating' })

In [21]:
product_ratings.head()

Unnamed: 0,userID,prod_ID,rating,prod_name
0,AKM1MP6P0OYPR,132793040,5.0,Kelby Training DVD: Mastering Blend Modes in A...
1,A2CX7LUOHB2NDG,321732944,5.0,Kelby Training DVD: Adobe Photoshop CS5 Crash ...
2,A2NWSAGRHCP8N5,439886341,1.0,Digital Organizer and Messenger
3,A2WNBOD3WNDNKT,439886341,3.0,Digital Organizer and Messenger
4,A1GI0U4ZRJA8WN,439886341,1.0,Digital Organizer and Messenger


In [22]:
product_ratings.to_csv('product_ratings.csv', sep='\t')

In [23]:
df=product_ratings

In [24]:
df.isnull().sum().sum()

293557

In [25]:
df=df.dropna()

In [26]:
df.shape

(7530925, 4)

In [27]:
df.isnull().any().any()

False

In [28]:
len(df['userID'].unique())

4053964

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7530925 entries, 0 to 7824481
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   userID     object 
 1   prod_ID    object 
 2   rating     float64
 3   prod_name  object 
dtypes: float64(1), object(3)
memory usage: 287.3+ MB


In [30]:
counts1=df['userID'].value_counts() 
counts=df['prod_ID'].value_counts()

In [31]:
counts1

A5JLAU2ARJ0BO     512
A3OXHLG6DIBRW8    482
ADLVFFE4VBT8      450
A6FIAB28IS79      424
A680RUE1FDO8B     395
                 ... 
A360Y4YLGUGK3C      1
A1RMGJ3LPPZJRA      1
A12GPY79VY6SQY      1
AEV9EUSR98LDZ       1
AH7ZNGM8WOOGN       1
Name: userID, Length: 4053964, dtype: int64

In [32]:
counts

B0074BW614    18244
B00DR0PDNE    16454
B007WTAJTO    14172
B0019EHU8G    12285
B003ELYQGG    11617
              ...  
B002DW977K        1
B003Z4CR0O        1
B009SBGK1A        1
B009MMP3T0        1
B004E7Q8BA        1
Name: prod_ID, Length: 469625, dtype: int64

In [33]:
df1=df[df['userID'].isin(counts1[counts1 >=50].index)]
df1.shape

(116794, 4)

In [34]:
df1=df1[df1['prod_ID'].isin(counts[counts >=50].index)]
df1.shape

(72341, 4)

In [35]:
df1.head()

Unnamed: 0,userID,prod_ID,rating,prod_name
492,A3CLWR1UUZT6TG,972683275,5.0,VideoSecu 24&quot; Long Arm TV Wall Mount Low ...
631,A3TAS1AG6FMBQW,972683275,5.0,VideoSecu 24&quot; Long Arm TV Wall Mount Low ...
1310,A3LDPF5FMB782Z,1400501466,5.0,"Barnes &amp; Noble NOOK Tablet 16gb (Color, BN..."
1322,A1A5KUIIIHFF4U,1400501466,1.0,"Barnes &amp; Noble NOOK Tablet 16gb (Color, BN..."
1335,A2XIOXRRYX0KZY,1400501466,3.0,"Barnes &amp; Noble NOOK Tablet 16gb (Color, BN..."


In [36]:
ratings_sum = pd.DataFrame(df1.groupby(['prod_ID'])['rating'].sum()).rename(columns = {'rating': 'ratings_sum'})
top10 = ratings_sum.sort_values('ratings_sum', ascending = False).head(10)
top10

Unnamed: 0_level_0,ratings_sum
prod_ID,Unnamed: 1_level_1
B003ES5ZUU,846.0
B0088CJT4U,814.0
B000N99BBC,755.0
B007WTAJTO,741.0
B00829TIEK,626.0
B00829THK0,560.0
B008DWCRQW,524.0
B004CLYEDC,517.0
B002R5AM7C,514.0
B002V88HFE,475.0


In [37]:
top10_popular=top10.merge(df1,left_index = True, right_on = 'prod_ID').drop_duplicates(
    ['prod_ID', 'prod_name'])[['prod_ID', 'prod_name','ratings_sum']]

In [38]:
print ('Top 10 Popular Products by sum user ratings\n')
top10_popular

Top 10 Popular Products by sum user ratings



Unnamed: 0,prod_ID,prod_name,ratings_sum
3313486,B003ES5ZUU,AmazonBasics High-Speed HDMI Cable - 15 Feet (...,846.0
6104981,B0088CJT4U,TP-LINK TL-WDR4300 Wireless N750 Dual Band Rou...,814.0
1198226,B000N99BBC,TP-LINK TL-SG1005D 10/100/1000Mbps 5-Port Giga...,755.0
5941380,B007WTAJTO,SanDisk Ultra 64GB MicroSDXC Class 10 UHS Memo...,741.0
6031403,B00829TIEK,Seagate Backup Plus 3TB USB 3.0 Desktop Extern...,626.0
6028195,B00829THK0,Seagate Backup Plus 1TB Desktop External Hard ...,560.0
6190152,B008DWCRQW,D-Link Wireless AC 1750 Mbps Home Cloud App-En...,524.0
4024382,B004CLYEDC,"Micra Digital CAT5e Snagless Patch Cable, 5 Fe...",517.0
2796338,B002R5AM7C,"Flip MinoHD Video Camera - Brushed Metal, 8 GB...",514.0
2883526,B002V88HFE,eneloop SEC-CSPACER4PK C Size Spacers for use ...,475.0


In [39]:
ratingsd=df1.pivot(index='prod_ID',columns= 'userID',values='rating').fillna(0)
ratingsd.head()

userID,A100UD67AHFODS,A100WO06OQR8BQ,A105S56ODHGJEK,A105TOJ6LTVMBG,A10AFVU66A79Y1,A10H24TDLK2VDP,A10NMELR4KX0J6,A10O7THJ2O20AG,A10PEXB6XAQ5XF,A10X9ME6R66JDX,...,AYOTEJ617O60K,AYP0YPLSP9ISM,AZ515FFZ7I2P7,AZ8XSDMIX04VJ,AZAC8O310IK4E,AZBXKUH4AIW3X,AZCE11PSTCH1L,AZMY6E8B52L2T,AZNUHQSHZHSUE,AZOK5STV85FBJ
prod_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
972683275,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1400501466,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1400501520,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1400501776,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1400532620,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
ratingsd.shape

(16250, 1438)

In [41]:
traind, testd = train_test_split(ratingsd, test_size=0.30,random_state=42)

In [43]:
train = traind.values
test = testd.values

In [47]:
type(train)

numpy.ndarray

In [49]:
sparsity = float(len(train.nonzero()[0]))
sparsity /= (train.shape[0] * train.shape[1])
sparsity *= 100
print ('Sparsity: {:5.2f}%'.format(sparsity))

Sparsity:  0.31%


In [50]:
def item_similarity(ratings, epsilon=1e-9):
    # epsilon -> for handling dived-by-zero errors
    sim = ratings.T.dot(ratings) + epsilon
    norms = np.array([np.sqrt(np.diagonal(sim))])
    return (sim / norms / norms.T)

In [51]:
item_sim = item_similarity(train)

In [52]:
def predict_item(ratings, similarity):
    return ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])

In [53]:
item_prediction = predict_item(train, item_sim)

In [54]:
item_prediction[:4, :4]

array([[1.13154332e-02, 2.39846407e-13, 4.18406498e-13, 1.42814874e-02],
       [7.08353547e-13, 9.88597954e-03, 6.26177519e-03, 9.03053061e-04],
       [6.14677728e-02, 5.25546476e-02, 6.00339326e-02, 3.81653351e-02],
       [3.07085918e-13, 4.71910825e-03, 2.83874998e-13, 3.87580711e-13]])

In [56]:
def get_mse(pred, actual):
    # Ignore nonzero terms.
    pred = pred[actual.nonzero()].flatten()
    actual = actual[actual.nonzero()].flatten()
    return mean_squared_error(pred, actual)

In [57]:
print ('Item-based CF MSE: ' + str(get_mse(item_prediction, test)))

Item-based CF MSE: 19.474033380362258


In [59]:
svd = TruncatedSVD(n_components=50, n_iter=7, random_state=42)
r_mat_tr=svd.fit_transform(traind) 
print(svd.explained_variance_ratio_)  
print(svd.explained_variance_ratio_.sum())

#pm=pd.DataFrame(cosine_similarity(r_mat_tr))
#pm.head()
ctrain = cosine_similarity(r_mat_tr)

[0.0146434  0.00719657 0.00617064 0.0047372  0.00449874 0.00414022
 0.00404678 0.00397648 0.0038515  0.00368769 0.00363522 0.00346191
 0.00339813 0.00330373 0.00321403 0.00316935 0.00310875 0.00307813
 0.00299295 0.00297409 0.00287833 0.00285293 0.00279511 0.00276082
 0.00275337 0.00273291 0.00269565 0.00268278 0.0026511  0.00264067
 0.00260969 0.00254763 0.00252148 0.00250637 0.00248348 0.00247264
 0.00242474 0.00240167 0.00240072 0.00237816 0.00234721 0.00234797
 0.00229641 0.00229128 0.00227067 0.00221551 0.00219942 0.0021687
 0.0021405  0.00210803]
0.1638614541293288


In [60]:
svd = TruncatedSVD(n_components=50, n_iter=7, random_state=42)
r_mat_tr=svd.fit_transform(testd) 
print(svd.explained_variance_ratio_)  
print(svd.explained_variance_ratio_.sum())

#pmtt=pd.DataFrame(cosine_similarity(r_mat_tr))
#print (pmtt[:2])
#pmtt.head()
ctest = cosine_similarity(r_mat_tr)

[0.02010026 0.00928553 0.00665453 0.0061458  0.00586075 0.00558877
 0.00524489 0.00512107 0.00502568 0.00495611 0.00477533 0.0045936
 0.00454932 0.00431189 0.00421198 0.00415595 0.00409129 0.00398132
 0.00389454 0.00385714 0.0037663  0.00370883 0.00366754 0.00357753
 0.0035108  0.00343275 0.00339622 0.00334434 0.0032986  0.00323527
 0.00321271 0.00316973 0.00315392 0.00313645 0.00306578 0.00304516
 0.0030226  0.00299558 0.00293345 0.00291208 0.00287544 0.0028458
 0.00283452 0.00281071 0.00280101 0.0027709  0.00276673 0.00270004
 0.00265762 0.00264926]
0.2097034528438083


In [61]:
print (' CF MSE: ' + str(get_mse(ctrain, ctest)))

 CF MSE: 0.04062760110645839


In [62]:
df1 = df1.sort_values(by='rating')
df1 = df1.reset_index(drop=True)
count_users = df1.groupby("userID", as_index=False).count()

In [63]:
count = df1.groupby("prod_ID", as_index=False).mean()

In [64]:
items_df = count[['prod_ID']]
items_df.head()
print(len(items_df))

16250


In [65]:
users_df = count_users[['userID']]
users_df.head()
print(len(users_df))

1438


In [66]:
users_list = users_df.values
len(users_list)

1438

In [67]:
df_clean_matrix = df1.pivot(index='prod_ID', columns='userID', values='rating').fillna(0)
df_clean_matrix = df_clean_matrix.T
R = (df_clean_matrix).values
R

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [68]:
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)
R_demeaned

array([[-0.02418462, -0.02418462, -0.02418462, ..., -0.02418462,
        -0.02418462, -0.02418462],
       [-0.02024615, -0.02024615, -0.02024615, ..., -0.02024615,
        -0.02024615, -0.02024615],
       [-0.00861538, -0.00861538, -0.00861538, ..., -0.00861538,
        -0.00861538, -0.00861538],
       ...,
       [-0.02861538, -0.02861538, -0.02861538, ..., -0.02861538,
        -0.02861538, -0.02861538],
       [-0.00793846, -0.00793846, -0.00793846, ..., -0.00793846,
        -0.00793846, -0.00793846],
       [-0.00886154, -0.00886154, -0.00886154, ..., -0.00886154,
        -0.00886154, -0.00886154]])

In [70]:
U, sigma, Vt = svds(R_demeaned)

In [71]:
sigma = np.diag(sigma)

In [72]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = df_clean_matrix.columns)
preds_df['userID'] = users_df
preds_df.set_index('userID', inplace=True)
preds_df.head()

prod_ID,0972683275,1400501466,1400501520,1400501776,1400532620,1400532655,140053271X,1400599997,1400698987,3744295508,...,B00JTI4X3E,B00JX1ZS5O,B00K0OBEE2,B00K4VQZCM,B00K7O2DJU,B00K91DB7Y,B00KFAGCUM,B00KINSDK8,B00KWHMR6G,B00L3YHF6O
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A100UD67AHFODS,0.015423,0.011009,0.013081,0.016089,0.011947,0.018877,0.012717,0.007655,0.013124,0.011771,...,0.009765,0.393998,0.034516,0.050864,0.013123,0.045171,0.00045,0.061026,0.008367,0.362923
A100WO06OQR8BQ,0.024796,0.00653,-0.001214,0.012578,-0.000154,0.005729,-0.001113,0.007468,0.003846,-0.001871,...,-0.001331,0.077428,-0.002063,0.015925,0.021663,0.244201,0.078147,-0.006486,0.006921,0.172743
A105S56ODHGJEK,-0.003408,0.001779,-0.002836,0.002212,-9.2e-05,-0.006465,-0.002632,0.008081,-0.001386,-0.002772,...,-0.001007,-0.043161,0.006384,0.015014,0.015221,0.224464,0.063182,0.003796,0.00331,0.024768
A105TOJ6LTVMBG,0.015205,0.008016,0.007105,0.008773,0.004356,0.012288,0.006753,0.004501,0.010781,0.006014,...,0.004625,0.005234,-0.003433,0.005988,0.008871,-0.017516,0.014102,-0.003865,0.006301,0.02396
A10AFVU66A79Y1,0.00998,0.006588,0.002785,0.009932,0.003707,0.004818,0.002853,0.008817,0.003796,0.002749,...,0.003077,-0.007986,0.001601,0.005722,0.013716,0.12354,0.0329,0.000953,0.007231,0.045863


In [73]:
preds_df.shape

(1438, 16250)

In [74]:
def recommend_it(predictions_df, itm_df, original_ratings_df, num_recommendations=10,ruserId='A100UD67AHFODS'):
    
    # Get and sort the user's predictions
    sorted_user_predictions = predictions_df.loc[ruserId].sort_values(ascending=False)
    
    # Get the user's data and merge in the item information.
    user_data = original_ratings_df[original_ratings_df.userID == ruserId]
    user_full = (user_data.merge(itm_df, how = 'left', left_on = 'prod_ID', right_on = 'prod_ID').
                     sort_values(['rating'], ascending=False)
                 )

    print ('User {0} has already purchased {1} items.'.format(ruserId, user_full.shape[0]))
    print ('Recommending the highest {0} predicted  items not already purchased.'.format(num_recommendations))
    
    # Recommend the highest predicted rating items that the user hasn't bought yet.
    recommendations = (itm_df[~itm_df['prod_ID'].isin(user_full['prod_ID'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'prod_ID',
               right_on = 'prod_ID').
         rename(columns = {ruserId: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )
    topk=recommendations.merge(original_ratings_df,left_index = True, right_on = 'prod_ID',left_on='prod_ID').drop_duplicates(
    ['prod_ID', 'prod_name'])[['prod_ID', 'prod_name']]

    return topk

In [77]:
df1.head()

Unnamed: 0,userID,prod_ID,rating,prod_name
0,A1BVE2ZIBKJ7YI,B000ONDRDU,1.0,Flip Video Camcorder: 30-Minutes (White)
1,A1T1YSCDW0PD25,B00AZ43MGU,1.0,PRO Call Blocker - Ver 2.0 - Incoming &amp; Ou...
2,ATOKT8QYK967L,B002FFG6JC,1.0,2-Port USB Home Travel Charger for Apple iPhon...
3,A5JLAU2ARJ0BO,B00006HYWR,1.0,Creative Labs NOMAD MuVo 64 MB MP3 Player
4,A33775AIB1A664,B005KG44V0,1.0,NETGEAR Wireless Router - N900 Dual Band Gigab...


In [78]:
orders=df1[df1['userID']=="A100UD67AHFODS"]
orders.head()

Unnamed: 0,userID,prod_ID,rating,prod_name
151,A100UD67AHFODS,B004MYFTD8,1.0,TomTom GO 2535TM 5-Inch Bluetooth GPS Navigato...
575,A100UD67AHFODS,B000069JWX,1.0,Sony CDPCX455 400 Disc MegaStorage CD Changer ...
1794,A100UD67AHFODS,B004V3KCJG,1.0,Universal Air Vent Mount (Compatible with All ...
1994,A100UD67AHFODS,B001FVMQEQ,1.0,"ZAGG invisibleSHIELD for iPod touch 2G, 3G (Fu..."
4095,A100UD67AHFODS,B000OMKR8E,2.0,Audio-Technica ATHANC7 Noise-cancelling Headph...


In [75]:
recommend_it(preds_df, items_df, df1, 5)

User A100UD67AHFODS has already purchased 85 items.
Recommending the highest 5 predicted  items not already purchased.


Unnamed: 0,prod_ID,prod_name
29131,B00G4UQ6U8,Brainwavz S1 In Ear Headphones
7834,B007OY5V68,NEW AYL&reg; Portable Mini Speaker System with...
10755,B00FSA8VQ2,Brainwavz Delta IEM Earphones
3596,B003ES5ZUU,AmazonBasics High-Speed HDMI Cable - 15 Feet (...
25798,B00DTZYHX4,Bolse&reg; 300Mbps Wifi Wireless-N USB Micro M...


In [76]:
recommend_it(preds_df, items_df, df1, 5,'A100WO06OQR8BQ')

User A100WO06OQR8BQ has already purchased 91 items.
Recommending the highest 5 predicted  items not already purchased.


Unnamed: 0,prod_ID,prod_name
11745,B004CLYEDC,"Micra Digital CAT5e Snagless Patch Cable, 5 Fe..."
5649,B000N99BBC,TP-LINK TL-SG1005D 10/100/1000Mbps 5-Port Giga...
5012,B004CLYEFK,Micra Digital USB A to USB B Cable (6 Feet)
1169,B00829THK0,Seagate Backup Plus 1TB Desktop External Hard ...
656,B00834SJSK,Seagate Expansion 500GB Portable External Hard...


In [97]:
cus_under_60=df1[df1['userID'].isin(counts1[ counts1 <= 60 ].index)]

In [98]:
cus_under_60.head()

Unnamed: 0,userID,prod_ID,rating,prod_name
2,ATOKT8QYK967L,B002FFG6JC,1.0,2-Port USB Home Travel Charger for Apple iPhon...
8,A2Z2MUP8JRJXEU,B0030B2O3M,1.0,Laptop Notebook Cool Pad w/ 3 Fans
9,AJCR7L4AXL2OG,B000NWWT3G,1.0,SanDisk 2 GB Cruzer Micro USB Flash Drive (SDC...
14,A3LQUEB84IR3VO,B006ZP8UOW,1.0,Foscam FI8910W Pan &amp; Tilt IP/Network Camer...
15,A3U6J0DLLDEWM2,B009H8JOZS,1.0,VIZIO E601i-A3 60-inch 1080p Razor LED Smart H...


In [92]:
def get_orders(user):
    orders=df[df['userID']==user]
    return orders.head()

We consider a certain user : ATOKT8QYK967L <br/>
Observe that his orders are mostly related to storage devices <br/>
Also high rate of usage of USB based devices <br/>
Recommendations should reflect that trend

In [99]:
get_orders("ATOKT8QYK967L")

Unnamed: 0,userID,prod_ID,rating,prod_name
21481,ATOKT8QYK967L,B00001P4XA,4.0,Koss 'The Plug' In-Ear Headphones (Black)
282019,ATOKT8QYK967L,B00009R6K7,4.0,Sigma DG 58mm Multi-Coated UV Filter
323171,ATOKT8QYK967L,B0000AVQXO,5.0,Canon CB-2LU Battery Charger for 2L &amp; 3L S...
351744,ATOKT8QYK967L,B0000TW3R4,4.0,Adesso Tru-Form Contoured Ergonomic MAC USB Wh...
1196473,ATOKT8QYK967L,B000N7VPM2,5.0,Koss KEB24 Portable Isolation Earbud (Black) (...
1315854,ATOKT8QYK967L,B000QUUFRW,5.0,SanDisk 4GB Extreme SDHC Class 10 Memory Card
1595110,ATOKT8QYK967L,B00125Y0NU,5.0,Kingston Apple 2GB Kit (2x1GB Modules) 667MHz ...
1677262,ATOKT8QYK967L,B0014E02AO,3.0,"Olympus Foam Float Strap, 202212, Red"
1716115,ATOKT8QYK967L,B0015DYMVO,5.0,Belkin 3-Outlet Mini Travel Swivel Charger Sur...
1807246,ATOKT8QYK967L,B0019235DC,5.0,Giottos AA1254 Padded Tripod Case Extra-wide 6...


In [103]:
recommend_it(preds_df, items_df, df1, 30,"ATOKT8QYK967L")

User ATOKT8QYK967L has already purchased 41 items.
Recommending the highest 30 predicted  items not already purchased.


Unnamed: 0,prod_ID,prod_name
718,B007WTAJTO,SanDisk Ultra 64GB MicroSDXC Class 10 UHS Memo...
3596,B003ES5ZUU,AmazonBasics High-Speed HDMI Cable - 15 Feet (...
2159,B0088CJT4U,TP-LINK TL-WDR4300 Wireless N750 Dual Band Rou...
1424,B002V88HFE,eneloop SEC-CSPACER4PK C Size Spacers for use ...
10,B00829TIEK,Seagate Backup Plus 3TB USB 3.0 Desktop Extern...
5649,B000N99BBC,TP-LINK TL-SG1005D 10/100/1000Mbps 5-Port Giga...
16064,B001TH7GUU,AmazonBasics USB 2.0 A-Male to A-Female Extens...
982,B005HMKKH4,WD My Passport 2TB Portable External USB 3.0 H...
472,B000LRMS66,Garmin Portable Friction Mount - Frustration F...
4130,B002WE6D44,Transcend 8 GB Class 10 SDHC Flash Memory Card...


In [104]:
get_orders("A2Z2MUP8JRJXEU")

Unnamed: 0,userID,prod_ID,rating,prod_name
190214,A2Z2MUP8JRJXEU,B00006I53S,4.0,Canon EF 28-135mm f/3.5-5.6 IS USM Standard Zo...
305968,A2Z2MUP8JRJXEU,B00009XVCZ,4.0,Canon EF 50mm f/1.4 USM Standard &amp; Medium ...
408616,A2Z2MUP8JRJXEU,B0001VGFKW,5.0,Yamaha NS-AW150BL 2-Way Outdoor Speakers (Pair...
447950,A2Z2MUP8JRJXEU,B0002BEPW6,5.0,Metra 99-3412 Installation Kit for 1993-1997 G...
448068,A2Z2MUP8JRJXEU,B0002BEQBG,5.0,Metra 99-5025 Installation Kit w/EQ Slot for 1...
585827,A2Z2MUP8JRJXEU,B0007DDK7A,5.0,Stofen OM-EY Omni-Bounce for the Canon 580EX F...
941920,A2Z2MUP8JRJXEU,B000FLNU4M,5.0,Yamaha Corporation of America NSIW470WH 3-Way ...
982585,A2Z2MUP8JRJXEU,B000H0IF2S,5.0,Apple iPod shuffle 1 GB Orange (2nd Generation...
1044608,A2Z2MUP8JRJXEU,B000ID7QNI,5.0,VideoSecu LCD Monitor TV Wall Mount Articulati...
1090190,A2Z2MUP8JRJXEU,B000JMJWV2,5.0,Transcend 4 GB Class 6 SDHC Flash Memory Card ...


In [106]:
recommend_it(preds_df, items_df, df1, 5,"A2Z2MUP8JRJXEU")

User A2Z2MUP8JRJXEU has already purchased 33 items.
Recommending the highest 5 predicted  items not already purchased.


Unnamed: 0,prod_ID,prod_name
8510,B0082E9K7U,XBOOM Mini Portable Capsule Speaker with Recha...
29131,B00G4UQ6U8,Brainwavz S1 In Ear Headphones
718,B007WTAJTO,SanDisk Ultra 64GB MicroSDXC Class 10 UHS Memo...
7834,B007OY5V68,NEW AYL&reg; Portable Mini Speaker System with...
4960,B00G4V0QSK,Brainwavz R3 Dual Dynamic Driver Earphones
