In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [23]:
import pandas as pd
import numpy as np
import random
import os
import gc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds

In [3]:
df = pd.read_csv('/content/drive/MyDrive/irfinalproject/file.csv')

In [4]:
df

Unnamed: 0.1,Unnamed: 0,Label,Text,ProductId,UserId,Username,HelpfulnessNumerator,HelpfulnessDenominator,Score
0,504174,1,The Creatures deliver the goods.: As a long ti...,B003P02EGU,A3LAN99BT1QBEN,Mrs. H.,1.0,1.0,4.0
1,533066,0,Don't buy used: Don't buy this used. Neither o...,B009E7YC54,A16F8IW5NK9Y4Q,Kristin Ames,1.0,1.0,3.0
2,264589,0,Belts burning the casing: Loved this vacuum---...,B002ZOPVG2,AIQDUKFB4X38M,VonMalcolm,1.0,1.0,1.0
3,290222,1,"Queensryche, has out-done its self with this c...",B000IZ0OC6,A1N1PMGMVFD8FD,gotjava,0.0,1.0,4.0
4,99874,1,Lexar multi card reader: The unit combines all...,B000H1CO74,A1GYEGLX3P2Y7P,Shala Kerrigan,1.0,1.0,5.0
...,...,...,...,...,...,...,...,...,...
49995,168722,1,"Great product, excellent value. I was buying t...",B005PANYEY,A3FABTXTZJ2ZN7,Fryday17,0.0,0.0,5.0
49996,64747,0,I was extremely dissapointed in this product. ...,B003WEDT4U,A1NNJKJV1V04JA,Marka,0.0,4.0,1.0
49997,239457,1,I am head over heels about these peanuts. They...,B001FA1IZA,A3BADG55FYI5M2,Gary M.,0.0,0.0,5.0
49998,161228,0,I have a 60lb English Bulldog/German Shepherd ...,B008O3G2K2,A3DH53FPYSP9E,Media Man,2.0,4.0,2.0


In [5]:
print('Number of unique USERS in Raw data = ', df['UserId'].nunique())
print('Number of unique ITEMS in Raw data = ', df['ProductId'].nunique())

Number of unique USERS in Raw data =  39440
Number of unique ITEMS in Raw data =  20584


In [6]:
most_rated = df.groupby('UserId').size().sort_values(ascending=False)[:10]
most_rated

UserId
A3OXHLG6DIBRW8    42
A281NPSIMI1C2R    39
AY12DBB0U420B     32
A1YUL9PCJR3JTY    31
A1TMAVN4CEM8U8    23
AZV26LP92E6WU     22
A1Z54EM24Y40LL    22
A36WGHR8TO5DKT    21
A2FRFAQCWZJT3Q    18
A1UQBFCERIP7VJ    18
dtype: int64

In [7]:
counts = df['UserId'].value_counts()
df_final = df[df['UserId'].isin(counts[counts >= 5].index)]

In [8]:
print('Number of users who have rated 5 or more items =', len(df_final))
print('Number of unique USERS in final data = ', df_final['UserId'].nunique())
print('Number of unique ITEMS in final data = ', df_final['ProductId'].nunique())

Number of users who have rated 5 or more items = 3172
Number of unique USERS in final data =  435
Number of unique ITEMS in final data =  2259


In [9]:
final_ratings_matrix = pd.pivot_table(df_final,index=['UserId'], columns = 'ProductId', values = "Score")
final_ratings_matrix.fillna(0,inplace=True)
print('Shape of final_ratings_matrix: ', final_ratings_matrix.shape)
given_num_of_ratings = np.count_nonzero(final_ratings_matrix)
print('given_num_of_ratings = ', given_num_of_ratings)
possible_num_of_ratings = final_ratings_matrix.shape[0] * final_ratings_matrix.shape[1]
print('possible_num_of_ratings = ', possible_num_of_ratings)
density = (given_num_of_ratings/possible_num_of_ratings)
density *= 100
print ('density: {:4.2f}%'.format(density))

Shape of final_ratings_matrix:  (435, 2259)
given_num_of_ratings =  3041
possible_num_of_ratings =  982665
density: 0.31%


In [10]:
final_ratings_matrix.tail()

ProductId,7310172001,7310172101,B0000537KC,B000084DWM,B000084EJT,B000084EK4,B000084EK5,B000084EKA,B000084EKY,B000084ETV,...,B0092VQPKM,B0093A5XC8,B0093NIPJS,B0096EZHM2,B00975HC9G,B009B87RWG,B009B87SAC,B009DS6IF6,B009E05HYQ,B009GHI5Q4
UserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AYGJ96W5KQMUJ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AYNAH993VDECT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AYOMAHLWRQHUG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0
AYWUHB7N8XGZQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AZV26LP92E6WU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
final_ratings_matrix_T = final_ratings_matrix.transpose()
final_ratings_matrix_T.head()

UserId,A106ZCP7RSXMRU,A1080SE9X3ECK0,A10PEXB6XAQ5XF,A10Z8FC0SMU5VQ,A11I1I9QLMAM1A,A11OTLEDSW8ZXD,A11UPNFYDICF3C,A11WJXVIIPADZR,A11YOTONCPRQ9S,A121VLJBL8T0H1,...,AXVKMYWNIHK7W,AY12DBB0U420B,AY1EF0GOH80EK,AYB4ELCS5AM8P,AYBYYDVV5ABJE,AYGJ96W5KQMUJ,AYNAH993VDECT,AYOMAHLWRQHUG,AYWUHB7N8XGZQ,AZV26LP92E6WU
ProductId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7310172001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7310172101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B0000537KC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B000084DWM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B000084EJT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
train_data, test_data = train_test_split(df_final, test_size = 0.3, random_state=0)

In [15]:
train_data_grouped = train_data.groupby('ProductId').agg({'UserId': 'count'}).reset_index()
train_data_grouped.rename(columns = {'UserId': 'score'},inplace=True)
train_data_grouped.head()

Unnamed: 0,ProductId,score
0,7310172001,2
1,7310172101,1
2,B0000537KC,1
3,B000084DWM,1
4,B000084EJT,1


In [16]:
#Sort the products on recommendation score 
train_data_sort = train_data_grouped.sort_values(['score', 'ProductId'], ascending = [0,1]) 
      
#Generate a recommendation rank based upon score 
train_data_sort['Rank'] = train_data_sort['score'].rank(ascending=0, method='first') 
          
#Get the top 5 recommendations 
popularity_recommendations = train_data_sort.head(5) 
popularity_recommendations 

Unnamed: 0,ProductId,score,Rank
1534,B005VOOM4A,10,1.0
910,B001OCKIP0,9,2.0
1500,B005HG9ET0,8,3.0
684,B001BCVY9W,7,4.0
1195,B003GTR8IO,7,5.0


In [17]:
# Use popularity based recommender model to make predictions
def recommend(user_id):     
    user_recommendations = popularity_recommendations 
          
    #Add user_id column for which the recommendations are being generated 
    user_recommendations['UserId'] = user_id 
      
    #Bring user_id column to the front 
    cols = user_recommendations.columns.tolist() 
    cols = cols[-1:] + cols[:-1] 
    user_recommendations = user_recommendations[cols] 
          
    return user_recommendations

In [18]:
find_recom = [15,121,200]   # This list is user choice.
for i in find_recom:
    print("Here is the recommendation for the userId: %d\n" %(i))
    print(recommend(i))    
    print("\n") 

Here is the recommendation for the userId: 15

      UserId   ProductId  score  Rank
1534      15  B005VOOM4A     10   1.0
910       15  B001OCKIP0      9   2.0
1500      15  B005HG9ET0      8   3.0
684       15  B001BCVY9W      7   4.0
1195      15  B003GTR8IO      7   5.0


Here is the recommendation for the userId: 121

      UserId   ProductId  score  Rank
1534     121  B005VOOM4A     10   1.0
910      121  B001OCKIP0      9   2.0
1500     121  B005HG9ET0      8   3.0
684      121  B001BCVY9W      7   4.0
1195     121  B003GTR8IO      7   5.0


Here is the recommendation for the userId: 200

      UserId   ProductId  score  Rank
1534     200  B005VOOM4A     10   1.0
910      200  B001OCKIP0      9   2.0
1500     200  B005HG9ET0      8   3.0
684      200  B001BCVY9W      7   4.0
1195     200  B003GTR8IO      7   5.0




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations['UserId'] = user_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations['UserId'] = user_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations['UserId'] = user_id


In [19]:
df_CF = pd.concat([train_data, test_data]).reset_index()
df_CF.tail()

Unnamed: 0.1,index,Unnamed: 0,Label,Text,ProductId,UserId,Username,HelpfulnessNumerator,HelpfulnessDenominator,Score
3167,32075,547481,0,Very unstable and annoying for kids: My son go...,B006HYLW32,A1PI8VBCXXSGC7,Lynn,0.0,0.0,3.0
3168,5422,312044,1,"Kool Album: Im a 100% rocker, a fan of real ro...",B000EVG8HY,A1YN7KUADY6QJ1,Jude,0.0,1.0,5.0
3169,26987,65426,1,Quite good -- if only there were more examples...,B001EQ55ZO,A2V0I904FH7ABY,Ram,0.0,0.0,5.0
3170,49860,400196,0,I was excited to try this after waking up afte...,B002IEVJRY,A1KEK09ZA6J9P8,Colleen M. Schneider,0.0,1.0,2.0
3171,18952,100792,0,"Writing is great, subject too disturbing: I us...",B001N3REYI,AF7DZ97VNSEWN,"Michael L. Love ""free is a verb""",0.0,0.0,3.0


In [20]:
#User-based Collaborative Filtering
# Matrix with row per 'user' and column per 'item' 
pivot_df = pd.pivot_table(df_CF,index=['UserId'], columns = 'ProductId', values = "Score")
pivot_df.fillna(0,inplace=True)
print(pivot_df.shape)
pivot_df.head()

(435, 2259)


ProductId,7310172001,7310172101,B0000537KC,B000084DWM,B000084EJT,B000084EK4,B000084EK5,B000084EKA,B000084EKY,B000084ETV,...,B0092VQPKM,B0093A5XC8,B0093NIPJS,B0096EZHM2,B00975HC9G,B009B87RWG,B009B87SAC,B009DS6IF6,B009E05HYQ,B009GHI5Q4
UserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A106ZCP7RSXMRU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A1080SE9X3ECK0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A10PEXB6XAQ5XF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A10Z8FC0SMU5VQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A11I1I9QLMAM1A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
pivot_df['user_index'] = np.arange(0, pivot_df.shape[0], 1)
pivot_df.head()

ProductId,7310172001,7310172101,B0000537KC,B000084DWM,B000084EJT,B000084EK4,B000084EK5,B000084EKA,B000084EKY,B000084ETV,...,B0093A5XC8,B0093NIPJS,B0096EZHM2,B00975HC9G,B009B87RWG,B009B87SAC,B009DS6IF6,B009E05HYQ,B009GHI5Q4,user_index
UserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A106ZCP7RSXMRU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
A1080SE9X3ECK0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
A10PEXB6XAQ5XF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
A10Z8FC0SMU5VQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
A11I1I9QLMAM1A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4


In [22]:
pivot_df.set_index(['user_index'], inplace=True)

# Actual ratings given by users
pivot_df.head()

ProductId,7310172001,7310172101,B0000537KC,B000084DWM,B000084EJT,B000084EK4,B000084EK5,B000084EKA,B000084EKY,B000084ETV,...,B0092VQPKM,B0093A5XC8,B0093NIPJS,B0096EZHM2,B00975HC9G,B009B87RWG,B009B87SAC,B009DS6IF6,B009E05HYQ,B009GHI5Q4
user_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
# Convert dense matrix to CSR sparse matrix
pivot_csr = csr_matrix(pivot_df)

# Singular Value Decomposition
U, sigma, Vt = svds(pivot_csr, k=50)

# Construct diagonal array in SVD
sigma = np.diag(np.float32(sigma))

In [25]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 

# Predicted ratings
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = pivot_df.columns)
preds_df.head()

ProductId,7310172001,7310172101,B0000537KC,B000084DWM,B000084EJT,B000084EK4,B000084EK5,B000084EKA,B000084EKY,B000084ETV,...,B0092VQPKM,B0093A5XC8,B0093NIPJS,B0096EZHM2,B00975HC9G,B009B87RWG,B009B87SAC,B009DS6IF6,B009E05HYQ,B009GHI5Q4
0,-0.004107,-0.004107,5.050641e-16,8.13391e-16,-0.002361,-0.001889,-0.000472,-0.000472,0.002754,8.219984e-17,...,1.197983e-15,2.289295e-16,0.000251,2.406804e-18,-2.9e-05,8.788506000000001e-18,4.003002e-16,0.022994,0.000538,4.7787820000000006e-17
1,-0.000155,-0.000155,-2.259983e-18,-1.538285e-18,-0.000102,-8.1e-05,-2e-05,-2e-05,-5e-06,7.592611e-19,...,-4.109162e-18,-4.805601e-19,-2.7e-05,-9.087337e-19,-2e-06,-6.451002e-20,1.6080129999999998e-19,-0.000456,6.6e-05,1.4222339999999998e-19
2,-0.000782,-0.000782,3.125451e-18,-3.028658e-17,0.003097,0.002477,0.000619,0.000619,-0.001157,-2.075011e-18,...,5.923218e-17,1.891644e-17,-8.4e-05,-1.868714e-18,-1.5e-05,-1.1063829999999999e-19,-3.2170270000000002e-18,-0.002843,0.000243,2.815585e-19
3,-0.00058,-0.00058,-2.23708e-17,-1.020347e-17,-0.04789,-0.038312,-0.009578,-0.009578,0.015531,5.625876e-17,...,-2.55459e-16,-6.413539000000001e-17,0.001425,6.849572999999999e-19,0.000114,2.789168e-19,2.874361e-17,0.005793,-0.001648,6.193373e-18
4,0.003694,0.003694,-7.501395e-17,1.353987e-16,-0.111626,-0.089301,-0.022325,-0.022325,-0.012507,-1.780585e-16,...,1.364782e-16,4.381565e-18,5.8e-05,-1.698923e-18,0.000208,-1.031582e-18,2.7706500000000003e-17,0.011876,-1.6e-05,3.825862e-18


In [26]:
def recommend_items(userID, pivot_df, preds_df, num_recommendations):
      
    user_idx = userID-1 # index starts at 0
    
    # Get and sort the user's ratings
    sorted_user_ratings = pivot_df.iloc[user_idx].sort_values(ascending=False)
    #sorted_user_ratings
    sorted_user_predictions = preds_df.iloc[user_idx].sort_values(ascending=False)
    #sorted_user_predictions

    temp = pd.concat([sorted_user_ratings, sorted_user_predictions], axis=1)
    temp.index.name = 'Recommended Items'
    temp.columns = ['user_ratings', 'user_predictions']
    
    temp = temp.loc[temp.user_ratings == 0]   
    temp = temp.sort_values('user_predictions', ascending=False)
    print('\nBelow are the recommended items for user(user_id = {}):\n'.format(userID))
    print(temp.head(num_recommendations))

In [27]:
#Enter 'userID' and 'num_recommendations' for the user #
userID = 121
num_recommendations = 5
recommend_items(userID, pivot_df, preds_df, num_recommendations)


Below are the recommended items for user(user_id = 121):

                   user_ratings  user_predictions
Recommended Items                                
B005HG9ET0                  0.0      1.773083e-16
B001LGGH54                  0.0      1.402918e-16
B002IEVJRY                  0.0      1.384967e-16
B001BCVY4W                  0.0      1.242964e-16
B005VOOM2W                  0.0      1.195259e-16


In [28]:
rmse_df = pd.concat([final_ratings_matrix.mean(), preds_df.mean()], axis=1)
rmse_df.columns = ['Avg_actual_ratings', 'Avg_predicted_ratings']
print(rmse_df.shape)
rmse_df['item_index'] = np.arange(0, rmse_df.shape[0], 1)
rmse_df.head()

(2259, 2)


Unnamed: 0_level_0,Avg_actual_ratings,Avg_predicted_ratings,item_index
ProductId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7310172001,0.011494,0.01184892,0
7310172101,0.011494,0.01184892,1
B0000537KC,0.009195,2.8801410000000002e-18,2
B000084DWM,0.011494,2.359028e-17,3
B000084EJT,0.011494,0.01052973,4


In [29]:
RMSE = round((((rmse_df.Avg_actual_ratings - rmse_df.Avg_predicted_ratings) ** 2).mean() ** 0.5), 5)
print('\nRMSE SVD Model = {} \n'.format(RMSE))


RMSE SVD Model = 0.00715 



In [30]:
 # Enter 'userID' and 'num_recommendations' for the user #
userID = 200
num_recommendations = 5
recommend_items(userID, pivot_df, preds_df, num_recommendations)


Below are the recommended items for user(user_id = 200):

                   user_ratings  user_predictions
Recommended Items                                
B005HG9ET0                  0.0          0.503492
B007RTR9G0                  0.0          0.179934
B004HOSGWE                  0.0          0.135672
B004JGQ15E                  0.0          0.133372
B000XIZLUS                  0.0          0.122194
