In [1]:
import pandas as pd
import numpy as np

In [10]:
products_df = pd.read_csv('Products.csv',usecols=['productId','product'],dtype={'productId': 'int32', 'product': 'str'})
rating_df=pd.read_csv('Products_rating.csv',usecols=['userId', 'productId', 'rating'],
    dtype={'userId': 'int32', 'productId': 'int32', 'rating': 'float32'})

In [11]:
products_df.head()

Unnamed: 0,productId,product
0,1,WHITE HANGING HEART T-LIGHT HOLDER
1,2,WHITE METAL LANTERN
2,3,CREAM CUPID HEARTS COAT HANGER
3,4,KNITTED UNION FLAG HOT WATER BOTTLE
4,5,RED WOOLLY HOTTIE WHITE HEART.


In [12]:
rating_df.head()

Unnamed: 0,userId,productId,rating
0,1,1,7.0
1,1,25,7.5
2,1,26,8.0
3,1,16,8.1
4,1,17,7.8


In [13]:
df = pd.merge(rating_df,products_df,on='productId')
df.head()

Unnamed: 0,userId,productId,rating,product
0,1,1,7.0,WHITE HANGING HEART T-LIGHT HOLDER
1,6,1,7.0,WHITE HANGING HEART T-LIGHT HOLDER
2,1,25,7.5,BLUE COAT RACK PARIS FASHION
3,3,25,8.3,BLUE COAT RACK PARIS FASHION
4,8,25,8.3,BLUE COAT RACK PARIS FASHION


In [14]:
combine_product_rating = df.dropna(axis = 0, subset = ['product'])
product_ratingCount = (combine_product_rating.
     groupby(by = ['product'])['rating'].
     count().
     reset_index().
     rename(columns = {'rating': 'totalRatingCount'})
     [['product', 'totalRatingCount']]
    )
product_ratingCount.head()

Unnamed: 0,product,totalRatingCount
0,3 TIER CAKE TIN RED AND CREAM,2
1,ALARM CLOCK BAKELIKE GREEN,3
2,ALARM CLOCK BAKELIKE PINK,2
3,ASSORTED COLOUR BIRD ORNAMENT,2
4,BATH BUILDING BLOCK WORD,3


In [15]:
rating_with_totalRatingCount = combine_product_rating.merge(product_ratingCount, left_on = 'product', right_on = 'product', how = 'left')
rating_with_totalRatingCount.head()

Unnamed: 0,userId,productId,rating,product,totalRatingCount
0,1,1,7.0,WHITE HANGING HEART T-LIGHT HOLDER,3
1,6,1,7.0,WHITE HANGING HEART T-LIGHT HOLDER,3
2,1,25,7.5,BLUE COAT RACK PARIS FASHION,3
3,3,25,8.3,BLUE COAT RACK PARIS FASHION,3
4,8,25,8.3,BLUE COAT RACK PARIS FASHION,3


In [16]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(product_ratingCount['totalRatingCount'].describe())

count   44.000
mean     2.500
std      1.338
min      1.000
25%      2.000
50%      2.000
75%      3.000
max      6.000
Name: totalRatingCount, dtype: float64


In [17]:
popularity_threshold = 100
rating_popular_product= rating_with_totalRatingCount.query('totalRatingCount >= @popularity_threshold')
rating_popular_product.head()

Unnamed: 0,userId,productId,rating,product,totalRatingCount
0,1,1,7.0,WHITE HANGING HEART T-LIGHT HOLDER,3
1,6,1,7.0,WHITE HANGING HEART T-LIGHT HOLDER,3
2,1,25,7.5,BLUE COAT RACK PARIS FASHION,3
3,3,25,8.3,BLUE COAT RACK PARIS FASHION,3
4,8,25,8.3,BLUE COAT RACK PARIS FASHION,3


In [18]:
rating_popular_product.shape

(110, 5)

In [19]:
product_features_df=rating_popular_product.pivot_table(index='product',columns='userId',values='rating').fillna(0)
product_features_df.head()

userId,1,2,3,4,5,6,7,8,9,10,...,12,13,14,15,16,17,18,19,20,21
product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3 TIER CAKE TIN RED AND CREAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,7.6,0.0,0.0,9.0,0.0,0.0
ALARM CLOCK BAKELIKE GREEN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,9.3,0.0,0.0,7.5,0.0,8.3,0.0
ALARM CLOCK BAKELIKE PINK,0.0,0.0,9.8,0.0,0.0,0.0,0.0,9.8,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ASSORTED COLOUR BIRD ORNAMENT,0.0,9.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BATH BUILDING BLOCK WORD,8.0,0.0,8.2,0.0,0.0,8.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
from scipy.sparse import csr_matrix

product_features_df_matrix = csr_matrix(product_features_df.values)

from sklearn.neighbors import NearestNeighbors


model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(product_features_df_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

In [21]:
product_features_df.shape

(44, 21)

In [22]:
query_index = 12
print(query_index)
distances, indices = model_knn.kneighbors(product_features_df.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 7)

12


In [23]:
product_features_df.head()

userId,1,2,3,4,5,6,7,8,9,10,...,12,13,14,15,16,17,18,19,20,21
product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3 TIER CAKE TIN RED AND CREAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,7.6,0.0,0.0,9.0,0.0,0.0
ALARM CLOCK BAKELIKE GREEN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,9.3,0.0,0.0,7.5,0.0,8.3,0.0
ALARM CLOCK BAKELIKE PINK,0.0,0.0,9.8,0.0,0.0,0.0,0.0,9.8,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ASSORTED COLOUR BIRD ORNAMENT,0.0,9.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BATH BUILDING BLOCK WORD,8.0,0.0,8.2,0.0,0.0,8.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(product_features_df.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, product_features_df.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for EDWARDIAN PARASOL NATURAL:

1: JUMBO SHOPPER VINTAGE RED PAISLEY, with distance of 0.0:
2: RED COAT RACK PARIS FASHION, with distance of 0.5749744176864624:
3: WHITE SPOT RED CERAMIC DRAWER KNOB, with distance of 0.5874135494232178:
4: WOOD BLACK BOARD ANT WHITE FINISH, with distance of 0.6257290244102478:
5: RED CHARLIE+LOLA PERSONAL DOORSIGN, with distance of 1.0:
6: PINK BREAKFAST CUP AND SAUCER , with distance of 1.0:
