In [99]:
import pandas as pd
import numpy as np

In [100]:
#importing datasets
products_df = pd.read_csv('Products.csv',encoding= 'unicode_escape',usecols=['productId','product'],dtype={'productId': 'int32', 'product': 'str'})
rating_df=pd.read_csv('Products_rating.csv',usecols=['userId', 'productId', 'rating'],
    dtype={'userId': 'int32', 'productId': 'int32', 'rating': 'float32'})

In [101]:
products_df.head()

Unnamed: 0,productId,product
0,101,Mango Pickle Sweet
1,102,Vegetable Pickle
2,103,Carrot Pickle
3,104,Udad Papad
4,105,Sabudana Papad


In [102]:
rating_df.head()

Unnamed: 0,userId,productId,rating
0,1,101,7.0
1,1,114,7.5
2,1,111,8.0
3,1,202,8.1
4,1,201,7.8


In [103]:
#Merging dataset
df = pd.merge(rating_df,products_df,on='productId')
df.head()

Unnamed: 0,userId,productId,rating,product
0,1,101,7.0,Mango Pickle Sweet
1,7,101,9.0,Mango Pickle Sweet
2,16,101,8.4,Mango Pickle Sweet
3,1,114,7.5,Lemon Pickle Sour
4,7,114,8.7,Lemon Pickle Sour


In [104]:
#Finding total rating count (Would be used to find poppularity)
combine_product_rating = df.dropna(axis = 0, subset = ['product'])
product_ratingCount = (combine_product_rating.
     groupby(by = ['product'])['rating'].
     count().
     reset_index().
     rename(columns = {'rating': 'totalRatingCount'})
     [['product', 'totalRatingCount']]
    )
product_ratingCount.head()

Unnamed: 0,product,totalRatingCount
0,AR HandicraftsSheesham Plate Holder Decorative...,4
1,Aafiya handicrafts® Rust Free Metal Plant Stan...,1
2,"Amazon Brand - Vedaka Chana Garlic Papad, 400g",5
3,"Amazon Brand - Vedaka Moong Garlic Papad, 400g",6
4,"Amazon Brand - Vedaka Moong Special Papad, 400g",2


In [105]:
product_ratingCount

Unnamed: 0,product,totalRatingCount
0,AR HandicraftsSheesham Plate Holder Decorative...,4
1,Aafiya handicrafts® Rust Free Metal Plant Stan...,1
2,"Amazon Brand - Vedaka Chana Garlic Papad, 400g",5
3,"Amazon Brand - Vedaka Moong Garlic Papad, 400g",6
4,"Amazon Brand - Vedaka Moong Special Papad, 400g",2
...,...,...
69,Woolen Handmade Sweater round neck,1
70,Woolen Vest,2
71,Zaveri Pearls Earrings,1
72,srishopify handicrafts Traditional Fabric Hand...,5


In [106]:
rating_with_totalRatingCount = combine_product_rating.merge(product_ratingCount, left_on = 'product', right_on = 'product', how = 'left')
rating_with_totalRatingCount.head()

Unnamed: 0,userId,productId,rating,product,totalRatingCount
0,1,101,7.0,Mango Pickle Sweet,3
1,7,101,9.0,Mango Pickle Sweet,3
2,16,101,8.4,Mango Pickle Sweet,3
3,1,114,7.5,Lemon Pickle Sour,3
4,7,114,8.7,Lemon Pickle Sour,3


In [107]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(product_ratingCount['totalRatingCount'].describe())

count   74.000
mean     2.311
std      1.344
min      1.000
25%      1.000
50%      2.000
75%      3.000
max      6.000
Name: totalRatingCount, dtype: float64


In [108]:
popularity_threshold = 2
rating_popular_product= rating_with_totalRatingCount.query('totalRatingCount >= @popularity_threshold')
rating_popular_product.head()

Unnamed: 0,userId,productId,rating,product,totalRatingCount
0,1,101,7.0,Mango Pickle Sweet,3
1,7,101,9.0,Mango Pickle Sweet,3
2,16,101,8.4,Mango Pickle Sweet,3
3,1,114,7.5,Lemon Pickle Sour,3
4,7,114,8.7,Lemon Pickle Sour,3


In [109]:
rating_popular_product

Unnamed: 0,userId,productId,rating,product,totalRatingCount
0,1,101,7.000,Mango Pickle Sweet,3
1,7,101,9.000,Mango Pickle Sweet,3
2,16,101,8.400,Mango Pickle Sweet,3
3,1,114,7.500,Lemon Pickle Sour,3
4,7,114,8.700,Lemon Pickle Sour,3
...,...,...,...,...,...
161,7,428,8.400,Handicraft Storeroom Acrylic Decorative Rangol...,3
162,8,428,9.000,Handicraft Storeroom Acrylic Decorative Rangol...,3
163,12,428,8.800,Handicraft Storeroom Acrylic Decorative Rangol...,3
165,9,426,9.300,Handicrafts Paradise Door Hanging Blue Painted...,2


In [110]:
rating_popular_product.shape

(145, 5)

In [111]:
product_features_df=rating_popular_product.pivot_table(index='product',columns='userId',values='rating').fillna(0)
product_features_df.head()

userId,1,2,3,4,5,6,7,8,9,10,...,12,13,14,15,16,17,18,19,20,21
product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AR HandicraftsSheesham Plate Holder Decorative Showpiece,0.0,0.0,0.0,0.0,0.0,9.1,0.0,8.1,0.0,0.0,...,0.0,0.0,9.1,0.0,0.0,0.0,7.0,0.0,0.0,0.0
"Amazon Brand - Vedaka Chana Garlic Papad, 400g",8.5,0.0,0.0,8.1,6.8,0.0,0.0,8.8,0.0,8.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Amazon Brand - Vedaka Moong Garlic Papad, 400g",8.1,7.6,8.9,0.0,7.5,9.6,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Amazon Brand - Vedaka Moong Special Papad, 400g",0.0,0.0,0.0,8.9,0.0,0.0,0.0,0.0,9.8,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Amazon Brand - Vedaka Urad Special Papad, 400g",0.0,0.0,0.0,0.0,0.0,8.1,0.0,0.0,9.7,0.0,...,7.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [112]:
from scipy.sparse import csr_matrix
product_features_df_matrix = csr_matrix(product_features_df.values)
from sklearn.neighbors import NearestNeighbors
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(product_features_df_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

In [113]:
product_features_df.shape

(48, 21)

In [114]:
query_index = 20
print(query_index)
distances, indices = model_knn.kneighbors(product_features_df.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 5)

20


In [115]:
product_features_df.head()

userId,1,2,3,4,5,6,7,8,9,10,...,12,13,14,15,16,17,18,19,20,21
product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AR HandicraftsSheesham Plate Holder Decorative Showpiece,0.0,0.0,0.0,0.0,0.0,9.1,0.0,8.1,0.0,0.0,...,0.0,0.0,9.1,0.0,0.0,0.0,7.0,0.0,0.0,0.0
"Amazon Brand - Vedaka Chana Garlic Papad, 400g",8.5,0.0,0.0,8.1,6.8,0.0,0.0,8.8,0.0,8.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Amazon Brand - Vedaka Moong Garlic Papad, 400g",8.1,7.6,8.9,0.0,7.5,9.6,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Amazon Brand - Vedaka Moong Special Papad, 400g",0.0,0.0,0.0,8.9,0.0,0.0,0.0,0.0,9.8,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Amazon Brand - Vedaka Urad Special Papad, 400g",0.0,0.0,0.0,0.0,0.0,8.1,0.0,0.0,9.7,0.0,...,7.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [118]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(product_features_df.index[query_index]))
    else:
        print('{0}: {1}'.format(i, product_features_df.index[indices.flatten()[i]]))

Recommendations for Handicraft-Palace Cotton Abstract Wall Hanging Poster:

1: Lemon Pickle Sweet
2: Vegetable Pickle 
3: Jackfruit Pickle
4: DRAVY HANDICRAFTS Elephant Door Wall Hanging


In [119]:
product_features_df2 = rating_popular_product.pivot_table(index='userId',columns='product',values='rating').fillna(0)

In [120]:
product_features_df2.head()

product,AR HandicraftsSheesham Plate Holder Decorative Showpiece,"Amazon Brand - Vedaka Chana Garlic Papad, 400g","Amazon Brand - Vedaka Moong Garlic Papad, 400g","Amazon Brand - Vedaka Moong Special Papad, 400g","Amazon Brand - Vedaka Urad Special Papad, 400g",Angira Handicrafts Wooden Serving Tray (Set of 2 ) 12 X 8 INCH with Digital Print (Brown),"Anu Appalam Papad Plain Indian Papad,400g - Pack of 2",BANARASI ALOO PAPAD Regular 250gms,"Brand Basket Handicraft Jewellery Box, Wedding Gift Box, Meenakari Wooden Box, Vanity Box.",Cartilage Helix Stainless Steel Eearring,...,Simran Handicrafts Wooden Spectacle Holder,Sparkling Gold Pendant necklace,Traditional Slipper dark brown,Traditional shoes,Udad Papad,Vegetable Pickle,Wooden Bracelet,Woolen Handmade Sweater V neck,Woolen Vest,srishopify handicrafts Traditional Fabric Handbags for Women
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,8.5,8.1,0.0,0.0,8.9,0.0,0.0,9.7,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.3
2,0.0,0.0,7.6,0.0,0.0,0.0,7.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,8.1
3,0.0,0.0,8.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.3,0.0,9.8,0.0,0.0,0.0,0.0,0.0,0.0,9.3
4,0.0,8.1,0.0,8.9,0.0,0.0,0.0,7.5,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,6.8,7.5,0.0,0.0,0.0,8.2,0.0,0.0,9.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.7


In [121]:
product_features_df2.shape

(21, 48)

In [122]:
X = product_features_df2.values.T
X.shape

(48, 21)

In [123]:
import sklearn
from sklearn.decomposition import TruncatedSVD

In [124]:
SVD = TruncatedSVD(n_components=12, random_state=17)
matrix = SVD.fit_transform(X)
matrix.shape

(48, 12)

In [125]:
import warnings

In [126]:
warnings.filterwarnings("ignore",category = RuntimeWarning)
corr = np.corrcoef(matrix)
corr.shape

(48, 48)

In [127]:
product_names = product_features_df2.columns
product_list = list(product_names)
Handbag = product_list.index("Handicraft-Palace Cotton Abstract Wall Hanging Poster")
print(Handbag)

20


In [128]:
corr_Handbag = corr[Handbag]

In [129]:
list(product_names[(corr_Handbag<1.0) & (corr_Handbag>0.4)])

['DRAVY HANDICRAFTS Elephant Door Wall Hanging',
 'Jackfruit Pickle',
 'Lemon Pickle Sweet',
 'Mens Cartilage stud Earring',
 'Vegetable Pickle ']