In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
%matplotlib inline
sns.set()

In [2]:
products = pd.read_csv('final_products.csv')

In [3]:
products.shape

(152, 8)

In [4]:
products.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152 entries, 0 to 151
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         152 non-null    object 
 1   category     152 non-null    object 
 2   pid          152 non-null    int64  
 3   price        152 non-null    float64
 4   image        152 non-null    object 
 5   description  152 non-null    object 
 6   rating       152 non-null    float64
 7   brand        152 non-null    object 
dtypes: float64(2), int64(1), object(5)
memory usage: 9.6+ KB


In [5]:
products.description[0]

'Hand Weaven Sabai Grass Wall Plate'

## <font color='grey'> Content Based Recommendations using similarities in description </font>

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfv = TfidfVectorizer(max_features=None,
                     strip_accents='unicode',
                     analyzer='word',
                     min_df=10,
                     token_pattern=r'\w{1,}',
                     ngram_range=(1,3),#take the combination of 1-3 different kind of words
                     stop_words='english')#removes all the unnecessary characters like the,in etc.
products['description'] = products['description'].fillna('')

In [7]:
#fitting the description column.
tfv_matrix = tfv.fit_transform(products['description'])#converting everythinng to sparse matrix.

In [8]:
tfv_matrix

<152x304 sparse matrix of type '<class 'numpy.float64'>'
	with 5729 stored elements in Compressed Sparse Row format>

In [9]:
tfv_matrix.shape

(152, 304)

In [10]:
from sklearn.metrics.pairwise import sigmoid_kernel
sig = sigmoid_kernel(tfv_matrix,tfv_matrix)#how description of first product is related to first product and so on.

In [11]:
sig[0]

array([0.76297219, 0.76159416, 0.76159416, 0.76200206, 0.76159416,
       0.76159416, 0.76159416, 0.76194517, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76187757,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76194391, 0.76180588,
       0.76159416, 0.76192939, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76173266, 0.76173266, 0.76173

In [26]:
indices = pd.Series(products.index,index=products['name']).drop_duplicates()

In [27]:
indices.head(10)

name
Indian Traditional Handicraft, For Decorative, 12"                                                       0
Indian Traditional Handicraft Golden Grass Tray, For Decorative, 8"x2"                                   1
Handmade Basket Weaving Indian Handicraft Supplier Golden Grass Mini Round Box, For Decorative, 3"X4"    2
Multicolor Wooden Elephant Statue Handmade Enamel Work Decorative Showpiece, 8*4*8                       3
Indian Handicrafts                                                                                       4
Metal Camel Statue Meenakari Painting Indian Crafts Jaipur Rajasthani Handicrafts                        5
TWG Handicraft Mix Polyresin Decorative Ashirwad Buddha Statue                                           6
Madhubani Hand Painted Pure Tussar Silk Long Dupatta., Cream,white                                       7
Silk Embroidery Dupatta                                                                                  8
Sami Gota Work Bandani silk dupp

In [14]:
def product_recommendation(title,sig=sig):
    indx = indices[title]
    
    #getting pairwise similarity scores
    sig_scores = list(enumerate(sig[indx]))
    
    #sorting products
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)
    
    #10 most similar products score
    sig_scores = sig_scores[1:11]
    
    #product indexes
    product_indices = [i[0] for i in sig_scores]
    
    #Top 10 most similar products
    return products['name'].iloc[product_indices]

In [15]:
n=input("Enter the name of the product: ")
print("\nTop Recommended products are: \n")
print(product_recommendation(n, sig=sig).unique())

Enter the name of the product: Indian Traditional Handicraft, For Decorative, 12"

Top Recommended products are: 

['Multicolor Wooden Elephant Statue Handmade Enamel Work Decorative Showpiece, 8*4*8'
 'Madhubani Hand Painted Pure Tussar Silk Long Dupatta., Cream,white'
 'Brown Wood Elephant Shikaar Carvings/Wooden Elephant'
 'Shoppingtara Canon Handicraft Home Decor Showpiece  -  5 cm'
 'Wooden Parrot Round Pavati Wp057'
 'Golden Round Decorative glass pumpkin, For Decoration'
 'Wooden Bawla Musician for Decoration, Size: 2.5 Inches'
 'eKolhapuri Terracotta Vase']


In [16]:
products['features'] = (pd.Series(products[['description', 'name', 'category']].fillna('').values.tolist()).str.join(' '))

In [17]:
tfv2 = TfidfVectorizer(max_features=None,
                     strip_accents='unicode',
                     analyzer='word',
                     min_df=10,
                     token_pattern=r'\w{1,}',
                     ngram_range=(1,3),#take the combination of 1-3 different kind of words
                     stop_words='english')

In [18]:
tfv_matrix2 = tfv2.fit_transform(products['features'])

In [19]:
tfv_matrix2

<152x325 sparse matrix of type '<class 'numpy.float64'>'
	with 6109 stored elements in Compressed Sparse Row format>

In [20]:
tfv_matrix2.shape

(152, 325)

In [21]:
sig2 = sigmoid_kernel(tfv_matrix2,tfv_matrix2)

In [22]:
sig2[0]

array([0.76288336, 0.76269908, 0.76243031, 0.76190131, 0.76253333,
       0.76204224, 0.76180229, 0.76171069, 0.76159416, 0.76159416,
       0.76179349, 0.76159416, 0.76159416, 0.76159416, 0.76179986,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76167912, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76177102,
       0.76164997, 0.76170213, 0.76162351, 0.76183118, 0.76159416,
       0.76167467, 0.76173569, 0.76163966, 0.76168235, 0.76203494,
       0.76182945, 0.76167872, 0.76159416, 0.76172526, 0.76167037,
       0.76159416, 0.76183855, 0.76159416, 0.76165803, 0.76168114,
       0.76164204, 0.76172738, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76162219, 0.76171044, 0.76171044, 0.76171

In [23]:
n=input("Enter the name of the product: ")
print("\nTop Recommended products are: \n")
print(product_recommendation(n, sig=sig2).unique())

Enter the name of the product: Indian Traditional Handicraft, For Decorative, 12"

Top Recommended products are: 

['Indian Traditional Handicraft Golden Grass Tray, For Decorative, 8"x2"'
 'Indian Handicrafts'
 'Handmade Basket Weaving Indian Handicraft Supplier Golden Grass Mini Round Box, For Decorative, 3"X4"'
 'Lal Haveli Decorative Owl Showpiece  -  15.24 cm'
 'Metal Camel Statue Meenakari Painting Indian Crafts Jaipur Rajasthani Handicrafts'
 'Round White Decorative Artificial Flowers Ring, For Event Decorations'
 'Amba Handicraft 3 Face Clock Showpiece  -  64 cm'
 'Multicolor Wooden Elephant Statue Handmade Enamel Work Decorative Showpiece, 8*4*8'
 'Utsav Handicraft UHD004 Showpiece  -  8 cm'
 'Ark Creation Decorative Shankh']


In [28]:
np.savetxt('CosinSimilarity.txt', sig)

In [30]:
new = np.loadtxt('CosineSimilarity.txt')

In [34]:
sig_scores1 = list(enumerate(sig[5]))

In [35]:
sig_scores1

[(0, 0.7615941559557649),
 (1, 0.7615941559557649),
 (2, 0.7615941559557649),
 (3, 0.7618189363620693),
 (4, 0.7615941559557649),
 (5, 0.7629721932106641),
 (6, 0.7617925235515622),
 (7, 0.7615941559557649),
 (8, 0.7615941559557649),
 (9, 0.7615941559557649),
 (10, 0.761777108459381),
 (11, 0.7615941559557649),
 (12, 0.7615941559557649),
 (13, 0.7615941559557649),
 (14, 0.7615941559557649),
 (15, 0.7615941559557649),
 (16, 0.7615941559557649),
 (17, 0.7615941559557649),
 (18, 0.7615941559557649),
 (19, 0.7615941559557649),
 (20, 0.7615941559557649),
 (21, 0.7615941559557649),
 (22, 0.7615941559557649),
 (23, 0.7615941559557649),
 (24, 0.7615941559557649),
 (25, 0.7615941559557649),
 (26, 0.7615941559557649),
 (27, 0.7615941559557649),
 (28, 0.7615941559557649),
 (29, 0.7615941559557649),
 (30, 0.7615941559557649),
 (31, 0.7615941559557649),
 (32, 0.7615941559557649),
 (33, 0.7615941559557649),
 (34, 0.7615941559557649),
 (35, 0.7619811064093744),
 (36, 0.7620569832615779),
 (37, 0.7621