### Loading the data

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel 

In [3]:
ds = pd.read_csv("C:/AITeam/AI In E-Commerce/Content Based Filtering/R Scripts/Products_Final.csv")

In [4]:
ds.head()

Unnamed: 0,Product.Key,Product.VLongDescription
0,1.0,Commercial Chrome Drying Rack 24' chrm w/blk f...
1,2.0,Slim Com Chrome Drying Rack 18' chrm w/blk fit...
2,3.0,Adj Ht Dbl Flared Garment Rack chrm+blk w/cast...
3,4.0,Adjustable Height Garment Rack chrm+blk w/cast...
4,5.0,Expandable Garment Rack adj ht-exp chrm+blk w/...


### Creating a TF-IDF Vectorizer 

In [5]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(ds['Product.VLongDescription'])

In [6]:
print(tfidf_matrix)

  (0, 76328)	0.07935712266736171
  (0, 110312)	0.16124597187705228
  (0, 143742)	0.16124597187705228
  (0, 92285)	0.12136040440078569
  (0, 143732)	0.12136040440078569
  (0, 92283)	0.11659231566918098
  (0, 143328)	0.11546374842308897
  (0, 92233)	0.11698274894081664
  (0, 95939)	0.16498303998341557
  (0, 66415)	0.16498303998341557
  (0, 75824)	0.1754534976739804
  (0, 24502)	0.18376436749321184
  (0, 143132)	0.18376436749321184
  (0, 92207)	0.18376436749321184
  (0, 76150)	0.16498303998341557
  (0, 83123)	0.18376436749321184
  (0, 137783)	0.07935712266736171
  (0, 76298)	0.07772615242395964
  (0, 110311)	0.16124597187705228
  (0, 143739)	0.12136040440078569
  (0, 143730)	0.11659231566918098
  (0, 92281)	0.19735995063245557
  (0, 143326)	0.11546374842308897
  (0, 95938)	0.16498303998341557
  (0, 66412)	0.15293510205782085
  :	:
  (5757, 169105)	0.028398911341511512
  (5757, 168383)	0.05840605854246204
  (5757, 146392)	0.04133458700333496
  (5757, 131163)	0.06741852451463248
  (5757, 13

###### Now, we have a representation of every item in terms of its description. Next, we need to calculate the relevance or similarity of one document to another.

### Vector Space Model & Calculating Cosine Similarity

In [7]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)
results = {}
for idx, row in ds.iterrows():
   similar_indices = cosine_similarities[idx].argsort()[:-100:-1] 
   similar_items = [(cosine_similarities[idx][i], ds['Product.Key'][i]) for i in similar_indices] 
   results[row['Product.Key']] = similar_items[1:]

print('done!')

done!


In [8]:
def item(id):
    return ds.loc[ds['Product.Key'] == id]['Product.VLongDescription'].tolist()[0].split(' - ')[0]

# Just reads the results out of the dictionary.
def recommend(item_id, num):
    print("Recommending " + str(num) + " products similar to " + item(item_id) + "...")
    print("-------")
    recs = results[item_id][:num]
    print("Recommended Item are below : " )
    for rec in recs:
        print("Product ID "  + str(rec[1]) + ", Description " + item(rec[1]) + " (score:" + str(rec[0]) + ")")


In [9]:
recommend(item_id=1, num=5)

Recommending 5 products similar to Commercial Chrome Drying Rack 24' chrm w/blk fit  drying rack Drying Racks Drying racks indoor Chrome plated steel  A...
-------
Recommended Item are below : 
Product ID 2.0, Description Slim Com Chrome Drying Rack 18' chrm w/blk fit  drying rack Drying Racks Drying racks indoor Chrome plated steel  D (score:0.7130191689865444)
Product ID 1932.0, Description Chrome Expandable Drying Rack expandable drying rack  drying rack Drying Racks Drying racks indoor Chrome plated steel   (score:0.5571693970496437)
Product ID 204.0, Description KD Folding Drying Rack 23'KD wht pcoat w/blk fit  Drying Rack Drying Racks Drying racks indoor Silver  A (score:0.5073620652471014)
Product ID 1365.0, Description Oversize Folding Drying Rack 27.5'KD wht pcoat w/blk fit  Drying Rack Drying Racks Drying racks indoor Charcoal  D (score:0.49198595650566085)
Product ID 2701.0, Description Tripod Drying Rack-black 3-arm foldable rack SS/ABS  Drying Rack Drying Racks Drying rack