In [1]:
import numpy as np
import pandas as pd

In [2]:
product_df = pd.read_csv('Product.csv',usecols=['ProductId','ProductName'],dtype={'ProductId': 'int32', 'ProductName': 'str'})
rating_df=pd.read_csv('Ratings.csv',usecols=['userId', 'ProductId', 'rating'],dtype={'userId': 'int32', 'ProductId': 'int32', 'rating': 'int32'})

In [3]:
product_df.head(10)

Unnamed: 0,ProductId,ProductName
0,15970,Turtle Check Men Navy Blue Shirt
1,39386,Peter England Men Party Blue Jeans
2,59263,Titan Women Silver Watch
3,21379,Manchester United Men Solid Black Track Pants
4,53759,Puma Men Grey T-shirt
5,1855,Inkfruit Mens Chain Reaction T-shirt
6,30805,Fabindia Men Striped Green Shirt
7,26960,Jealous 21 Women Purple Shirt
8,29114,Puma Men Pack of 3 Socks
9,30039,Skagen Men Black Watch


In [4]:
rating_df.head(10)

Unnamed: 0,userId,rating,ProductId
0,1,4,15970
1,1,4,39386
2,1,4,59263
3,1,5,21379
4,1,5,53759
5,1,3,1855
6,1,5,30805
7,1,4,26960
8,1,5,29114
9,1,5,30039


In [5]:
df = pd.merge(rating_df,product_df,on='ProductId')
df.head(10)

Unnamed: 0,userId,rating,ProductId,ProductName
0,1,4,15970,Turtle Check Men Navy Blue Shirt
1,1,4,39386,Peter England Men Party Blue Jeans
2,1,4,59263,Titan Women Silver Watch
3,1,5,21379,Manchester United Men Solid Black Track Pants
4,1,5,53759,Puma Men Grey T-shirt
5,1,3,1855,Inkfruit Mens Chain Reaction T-shirt
6,1,5,30805,Fabindia Men Striped Green Shirt
7,1,4,26960,Jealous 21 Women Purple Shirt
8,1,5,29114,Puma Men Pack of 3 Socks
9,1,5,30039,Skagen Men Black Watch


In [6]:
combine_product_rating = df.dropna(axis = 0, subset = ['ProductName'])
product_ratingCount = (combine_product_rating.
     groupby(by = ['ProductName'])['rating'].
     count().
     reset_index().
     rename(columns = {'rating': 'totalRatingCount'})
     [['ProductName', 'totalRatingCount']]
    )
product_ratingCount.head()

Unnamed: 0,ProductName,totalRatingCount
0,109F Blue A-Line Dress,1
1,109F Red & White A-Line Dress,1
2,109F Women Beige Embroidered Top,1
3,109F Women Black & Cream Dress,1
4,109F Women Black & Cream-Coloured Colourblocke...,1


In [7]:
rating_with_totalRatingCount = combine_product_rating.merge(product_ratingCount, left_on = 'ProductName', right_on = 'ProductName', how = 'left')
rating_with_totalRatingCount.head(10)

Unnamed: 0,userId,rating,ProductId,ProductName,totalRatingCount
0,1,4,15970,Turtle Check Men Navy Blue Shirt,1
1,1,4,39386,Peter England Men Party Blue Jeans,1
2,1,4,59263,Titan Women Silver Watch,5
3,1,5,21379,Manchester United Men Solid Black Track Pants,1
4,1,5,53759,Puma Men Grey T-shirt,6
5,1,3,1855,Inkfruit Mens Chain Reaction T-shirt,1
6,1,5,30805,Fabindia Men Striped Green Shirt,1
7,1,4,26960,Jealous 21 Women Purple Shirt,2
8,1,5,29114,Puma Men Pack of 3 Socks,4
9,1,5,30039,Skagen Men Black Watch,1


In [8]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(product_ratingCount['totalRatingCount'].describe())

count   31135.000
mean        1.427
std         1.738
min         1.000
25%         1.000
50%         1.000
75%         1.000
max        82.000
Name: totalRatingCount, dtype: float64


In [9]:
popularity_threshold = 50
rating_popular_product= rating_with_totalRatingCount.query('totalRatingCount >= @popularity_threshold')
rating_popular_product.head(10)

Unnamed: 0,userId,rating,ProductId,ProductName,totalRatingCount
85,1,5,48781,Lucera Women Silver Pendant,56
123,1,4,49005,Lucera Women Silver Ring,50
394,4,3,48786,Lucera Women Silver Pendant,56
430,4,5,48772,Lucera Women Silver Pendant,56
583,6,5,48927,Lucera Women Silver Ring,50
744,6,3,49034,Lucera Women Silver Ring,50
1072,8,3,48929,Lucera Women Silver Ring,50
1106,9,3,49033,Lucera Women Silver Ring,50
1217,10,4,48916,Lucera Women Silver Pendant,56
1321,11,3,48788,Lucera Women Silver Pendant,56


In [10]:
rating_popular_product.shape

(188, 5)

In [11]:
# First lets create a Pivot matrix
product_features_df=rating_popular_product.pivot_table(index='ProductName',columns='userId',values='rating').fillna(0)
product_features_df.head(10)

userId,1,4,6,8,9,10,11,14,15,16,...,268,271,274,275,279,282,288,289,290,294
ProductName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Lucera Women Silver Earrings,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,...,0.0,0.0,4.0,0.0,4.0,0.0,4.0,5.0,4.0,0.0
Lucera Women Silver Pendant,5.0,4.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,...,5.0,3.0,1.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
Lucera Women Silver Ring,4.0,0.0,4.0,3.0,3.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,3.667,2.0,0.0,4.0,4.0,0.0,0.0,2.5


In [12]:
from scipy.sparse import csr_matrix

product_features_df_matrix = csr_matrix(product_features_df.values)

from sklearn.neighbors import NearestNeighbors

model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(product_features_df_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [13]:
product_features_df.shape

(3, 113)

In [14]:
query_index = np.random.choice(product_features_df.shape[0])
print(query_index)
distances, indices = model_knn.kneighbors(product_features_df.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 3)

1


In [15]:
product_features_df.head(10)

userId,1,4,6,8,9,10,11,14,15,16,...,268,271,274,275,279,282,288,289,290,294
ProductName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Lucera Women Silver Earrings,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,...,0.0,0.0,4.0,0.0,4.0,0.0,4.0,5.0,4.0,0.0
Lucera Women Silver Pendant,5.0,4.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,...,5.0,3.0,1.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
Lucera Women Silver Ring,4.0,0.0,4.0,3.0,3.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,3.667,2.0,0.0,4.0,4.0,0.0,0.0,2.5


In [33]:
for i in range(0, len(distances.flatten())):
    if i == 1:
        print('Recommendations for {0}:\n'.format(product_features_df.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, product_features_df.index[indices.flatten()[i]], distances.flatten()[i]))

0: Lucera Women Silver Pendant, with distance of 5.551115123125783e-16:
Recommendations for Lucera Women Silver Pendant:

2: Lucera Women Silver Ring, with distance of 0.8192816519943599:
