In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
%matplotlib inline
sns.set()

In [2]:
products = pd.read_csv('final_products.csv')

In [3]:
products.shape

(152, 8)

In [5]:
products.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152 entries, 0 to 151
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         152 non-null    object 
 1   category     152 non-null    object 
 2   pid          152 non-null    int64  
 3   price        152 non-null    float64
 4   image        152 non-null    object 
 5   description  152 non-null    object 
 6   rating       152 non-null    float64
 7   brand        152 non-null    object 
dtypes: float64(2), int64(1), object(5)
memory usage: 9.6+ KB


In [6]:
products.description[0]

'Hand Weaven Sabai Grass Wall Plate'

## <font color='grey'> Content Based Recommendations using similarities in description </font>

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfv = TfidfVectorizer(max_features=None,
                     strip_accents='unicode',
                     analyzer='word',
                     min_df=10,
                     token_pattern=r'\w{1,}',
                     ngram_range=(1,3),#take the combination of 1-3 different kind of words
                     stop_words='english')#removes all the unnecessary characters like the,in etc.
products['description'] = products['description'].fillna('')

In [9]:
#fitting the description column.
tfv_matrix = tfv.fit_transform(products['description'])#converting everythinng to sparse matrix.

In [10]:
tfv_matrix

<152x304 sparse matrix of type '<class 'numpy.float64'>'
	with 5729 stored elements in Compressed Sparse Row format>

In [11]:
tfv_matrix.shape

(152, 304)

In [12]:
from sklearn.metrics.pairwise import sigmoid_kernel
sig = sigmoid_kernel(tfv_matrix,tfv_matrix)#how description of first product is related to first product and so on.

In [13]:
sig[0]

array([0.76297219, 0.76159416, 0.76159416, 0.76200206, 0.76159416,
       0.76159416, 0.76159416, 0.76194517, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76187757,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76194391, 0.76180588,
       0.76159416, 0.76192939, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76159416, 0.76159416, 0.76159416,
       0.76159416, 0.76159416, 0.76173266, 0.76173266, 0.76173

In [15]:
indices = pd.Series(products.index,index=products['name']).drop_duplicates()

In [16]:
indices.head(10)

name
Indian Traditional Handicraft, For Decorative, 12"                                                       0
Indian Traditional Handicraft Golden Grass Tray, For Decorative, 8"x2"                                   1
Handmade Basket Weaving Indian Handicraft Supplier Golden Grass Mini Round Box, For Decorative, 3"X4"    2
Multicolor Wooden Elephant Statue Handmade Enamel Work Decorative Showpiece, 8*4*8                       3
Indian Handicrafts                                                                                       4
Metal Camel Statue Meenakari Painting Indian Crafts Jaipur Rajasthani Handicrafts                        5
TWG Handicraft Mix Polyresin Decorative Ashirwad Buddha Statue                                           6
Madhubani Hand Painted Pure Tussar Silk Long Dupatta., Cream,white                                       7
Silk Embroidery Dupatta                                                                                  8
Sami Gota Work Bandani silk dupp

In [22]:
def product_recommendation(title,sig=sig):
    indx = indices[title]
    
    #getting pairwise similarity scores
    sig_scores = list(enumerate(sig[indx]))
    
    #sorting products
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)
    
    #10 most similar products score
    sig_scores = sig_scores[1:11]
    
    #product indexes
    product_indices = [i[0] for i in sig_scores]
    
    #Top 10 most similar products
    return products['name'].iloc[product_indices]

In [23]:
n=input("Enter the name of the product: ")
print("\nTop Recommended products are: \n")
print(product_recommendation(n).unique())

Enter the name of the product: Simran Handicrafts Solid Wood Tray

Top Recommended products are: 

['Rajrang Carved Pattern Elephant Showpiece  -  8 cm'
 'Rastogi Handicrafts JOINT LESS LEAK PROOF DECORATIVE 950 ml Bottle'
 'Balaji Velvet Sofa Cover'
 'Mahadev Handicrafts Cotton Cartoon Double Bedsheet'
 'AND Designs Artistic Wooden Key Holder Wooden Key Holder'
 'Brown Wood Elephant Shikaar Carvings/Wooden Elephant'
 'Ethnic Handicrafts Solid Wood Queen Bed'
 'Ethnic Handicrafts Solid Wood Single Bed']
