In [1]:
import numpy as np 
import pandas as pd 

In [2]:
books = pd.read_csv('books.csv')
users = pd.read_csv('users.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
print(books.shape)
print(ratings.shape)
print(users.shape)

(19, 8)
(1048575, 3)
(278858, 3)


In [4]:
books.duplicated().sum()

0

In [5]:
books.isnull().sum()

ISBN                    1
Product-Title           0
Product-Owner           1
Year-Of-Publication     1
Publisher               1
Unnamed: 5             19
Image-URL-M             0
Image-URL-L             1
dtype: int64

In [6]:
books.head()

Unnamed: 0,ISBN,Product-Title,Product-Owner,Year-Of-Publication,Publisher,Unnamed: 5,Image-URL-M,Image-URL-L
0,195153448,Duke 390,Mark P. O. Morford,2002.0,Oxford University Press,,https://autocdn.co.uk/cdn-cgi/imagedelivery/JC...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Dining Table,Carlo D'Este,1991.0,HarperPerennial,,https://scontent.fktm3-1.fna.fbcdn.net/v/t39.3...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Computer Table,Gina Bari Kolata,1999.0,Farrar Straus Giroux,,https://media.karousell.com/media/photos/produ...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,Chair,E. J. W. Barber,1999.0,W. W. Norton &amp; Company,,https://for-sale.used-secondhand.co.uk/media/u...,http://images.amazon.com/images/P/0393045218.0...


## Popularity Based Recommender System

In [7]:
ratings_with_name = ratings.merge(books,on='ISBN')
ratings_with_name

Unnamed: 0,User-ID,ISBN,Book-Rating,Product-Title,Product-Owner,Year-Of-Publication,Publisher,Unnamed: 5,Image-URL-M,Image-URL-L
0,2,195153448,0,Duke 390,Mark P. O. Morford,2002.0,Oxford University Press,,https://autocdn.co.uk/cdn-cgi/imagedelivery/JC...,http://images.amazon.com/images/P/0195153448.0...
1,8,2005018,5,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
2,11400,2005018,0,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
3,11676,2005018,8,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
4,41385,2005018,0,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
...,...,...,...,...,...,...,...,...,...,...
86,176875,1575663937,0,Michelin Tyre,Robert Hendrickson,1999.0,Kensington Publishing Corp.,,https://media.karousell.com/media/photos/produ...,http://images.amazon.com/images/P/1575663937.0...
87,213915,1575663937,0,Michelin Tyre,Robert Hendrickson,1999.0,Kensington Publishing Corp.,,https://media.karousell.com/media/photos/produ...,http://images.amazon.com/images/P/1575663937.0...
88,8,1881320189,7,Ceat Tyre,Julia Oliver,1994.0,River City Pub,,https://3.imimg.com/data3/PB/XH/GLADMIN-155616...,http://images.amazon.com/images/P/1881320189.0...
89,35826,1881320189,0,Ceat Tyre,Julia Oliver,1994.0,River City Pub,,https://3.imimg.com/data3/PB/XH/GLADMIN-155616...,http://images.amazon.com/images/P/1881320189.0...


In [8]:
num_rating_df = ratings_with_name.groupby('Product-Title').count()['Book-Rating'].reset_index()

In [9]:
num_rating_df = ratings_with_name.groupby('Product-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_ratings'},inplace=True)
num_rating_df

Unnamed: 0,Product-Title,num_ratings
0,Air Conditioner,6
1,Air Cooller,1
2,Audi R8 Sterring wheel,1
3,Ceat Tyre,3
4,Chair,1
5,Computer Table,11
6,Dining Table,3
7,Duke 390,1
8,Gamming Chair,30
9,Guitar,3


In [10]:
avg_rating_df = ratings_with_name.groupby('Product-Title').mean()['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating':'avg_rating'},inplace=True)
avg_rating_df

Unnamed: 0,Product-Title,avg_rating
0,Air Conditioner,0.0
1,Air Cooller,0.0
2,Audi R8 Sterring wheel,6.0
3,Ceat Tyre,2.333333
4,Chair,0.0
5,Computer Table,0.545455
6,Dining Table,2.666667
7,Duke 390,0.0
8,Gamming Chair,1.533333
9,Guitar,2.666667


In [11]:
popular_df = num_rating_df.merge(avg_rating_df,on='Product-Title')
popular_df

Unnamed: 0,Product-Title,num_ratings,avg_rating
0,Air Conditioner,6,0.0
1,Air Cooller,1,0.0
2,Audi R8 Sterring wheel,1,6.0
3,Ceat Tyre,3,2.333333
4,Chair,1,0.0
5,Computer Table,11,0.545455
6,Dining Table,3,2.666667
7,Duke 390,1,0.0
8,Gamming Chair,30,1.533333
9,Guitar,3,2.666667


In [12]:
popular_df

Unnamed: 0,Product-Title,num_ratings,avg_rating
0,Air Conditioner,6,0.0
1,Air Cooller,1,0.0
2,Audi R8 Sterring wheel,1,6.0
3,Ceat Tyre,3,2.333333
4,Chair,1,0.0
5,Computer Table,11,0.545455
6,Dining Table,3,2.666667
7,Duke 390,1,0.0
8,Gamming Chair,30,1.533333
9,Guitar,3,2.666667


In [13]:
popular_df = popular_df[popular_df['num_ratings']>0].sort_values('avg_rating',ascending=False).head(15)

In [14]:
popular_df

Unnamed: 0,Product-Title,num_ratings,avg_rating
2,Audi R8 Sterring wheel,1,6.0
17,fridge,1,5.0
13,Rocking Cair,1,5.0
6,Dining Table,3,2.666667
9,Guitar,3,2.666667
14,Sofa Set,2,2.5
3,Ceat Tyre,3,2.333333
11,Michelin Tyre,3,2.0
16,Washing Machine,4,2.0
10,Mercedes Gwagon,13,1.615385


## Collaborative Filtering Based Recommender System

In [15]:
popular_df = popular_df.merge(books,on='Product-Title').drop_duplicates('Product-Title')[['Product-Title','Product-Owner','Image-URL-M','num_ratings','avg_rating']]

In [16]:
popular_df

Unnamed: 0,Product-Title,Product-Owner,Image-URL-M,num_ratings,avg_rating
0,Audi R8 Sterring wheel,Loren D. Estleman,https://media.karousell.com/media/photos/produ...,1,6.0
1,fridge,Ann Beattie,https://5.imimg.com/data5/SELLER/Default/2022/...,1,5.0
2,Rocking Cair,R. J. Kaiser,https://media.mutualart.com/Images/2022_11/09/...,1,5.0
3,Dining Table,Carlo D'Este,https://scontent.fktm3-1.fna.fbcdn.net/v/t39.3...,3,2.666667
4,Guitar,Scott Turow,https://mattsmusic.com/wp-content/uploads/2022...,3,2.666667
5,Sofa Set,Sheila Heti,https://m.media-amazon.com/images/I/61vWTGxwgA...,2,2.5
6,Ceat Tyre,Julia Oliver,https://3.imimg.com/data3/PB/XH/GLADMIN-155616...,3,2.333333
7,Michelin Tyre,Robert Hendrickson,https://media.karousell.com/media/photos/produ...,3,2.0
8,Washing Machine,Robert Cowley,https://i.pinimg.com/originals/de/ea/a0/deeaa0...,4,2.0
9,Mercedes Gwagon,Richard Bruce Wright,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,13,1.615385


In [17]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] >= 1
rated_user = x[x].index

In [18]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(rated_user)]

In [19]:
y = filtered_rating.groupby('Product-Title').count()['Book-Rating']>=2
famous_products = y[y].index

In [20]:
famous_products

Index(['Air Conditioner', 'Ceat Tyre', 'Computer Table', 'Dining Table',
       'Gamming Chair', 'Guitar', 'Mercedes Gwagon', 'Michelin Tyre',
       'Sofa Set', 'Wall Mirror', 'Washing Machine'],
      dtype='object', name='Product-Title')

In [21]:
final_ratings = filtered_rating[filtered_rating['Product-Title'].isin(famous_products)]

In [22]:
final_ratings.drop_duplicates()

Unnamed: 0,User-ID,ISBN,Book-Rating,Product-Title,Product-Owner,Year-Of-Publication,Publisher,Unnamed: 5,Image-URL-M,Image-URL-L
1,8,2005018,5,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
2,11400,2005018,0,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
3,11676,2005018,8,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
4,41385,2005018,0,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
5,67544,2005018,8,Mercedes Gwagon,Richard Bruce Wright,2001.0,HarperFlamingo Canada,,https://media.donedeal.ie/eyJidWNrZXQiOiJkb25l...,http://images.amazon.com/images/P/0002005018.0...
...,...,...,...,...,...,...,...,...,...,...
86,176875,1575663937,0,Michelin Tyre,Robert Hendrickson,1999.0,Kensington Publishing Corp.,,https://media.karousell.com/media/photos/produ...,http://images.amazon.com/images/P/1575663937.0...
87,213915,1575663937,0,Michelin Tyre,Robert Hendrickson,1999.0,Kensington Publishing Corp.,,https://media.karousell.com/media/photos/produ...,http://images.amazon.com/images/P/1575663937.0...
88,8,1881320189,7,Ceat Tyre,Julia Oliver,1994.0,River City Pub,,https://3.imimg.com/data3/PB/XH/GLADMIN-155616...,http://images.amazon.com/images/P/1881320189.0...
89,35826,1881320189,0,Ceat Tyre,Julia Oliver,1994.0,River City Pub,,https://3.imimg.com/data3/PB/XH/GLADMIN-155616...,http://images.amazon.com/images/P/1881320189.0...


In [23]:
pt = final_ratings.pivot_table(index='Product-Title',columns='User-ID',values='Book-Rating')

In [24]:
pt.fillna(0,inplace=True)

In [25]:
pt

User-ID,8,2954,3363,11400,11676,24539,29526,35704,35826,36836,...,219008,223066,225910,226745,230522,236322,238557,242247,245827,248583
Product-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Air Conditioner,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ceat Tyre,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Computer Table,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Dining Table,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Gamming Chair,0.0,0.0,0.0,0.0,9.0,0.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Guitar,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Mercedes Gwagon,5.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Michelin Tyre,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Sofa Set,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Wall Mirror,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
from sklearn.metrics.pairwise import cosine_similarity

In [27]:
similarity_scores = cosine_similarity(pt)

In [28]:
similarity_scores.shape

(11, 11)

In [39]:
def recommend(product_name):
    #index fetch
    index = np.where(pt.index==product_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:6]
    
    data=[]
    for i in similar_items:
        item = []
        temp_df =( books[books['Product-Title'] == pt.index[i[0]]])
        item.extend(list(temp_df.drop_duplicates('Product-Title')['Product-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Product-Title')['Product-Owner'].values))
        item.extend(list(temp_df.drop_duplicates('Product-Title')['Image-URL-M'].values))
        
        data.append(item)
        
    return data

In [40]:
recommend('Ceat Tyre')

[['Michelin Tyre',
  'Robert Hendrickson',
  'https://media.karousell.com/media/photos/products/2021/1/21/4pcs_1857014_michelin_xm2_used_1611234339_add337ad_progressive.jpg'],
 ['Sofa Set',
  'Sheila Heti',
  'https://m.media-amazon.com/images/I/61vWTGxwgAL._SL1100_.jpg'],
 ['Mercedes Gwagon',
  'Richard Bruce Wright',
  'https://media.donedeal.ie/eyJidWNrZXQiOiJkb25lZGVhbC5pZS1waG90b3MiLCJlZGl0cyI6eyJ0b0Zvcm1hdCI6ImpwZWciLCJyZXNpemUiOnsiZml0IjoiY292ZXIiLCJ3aWR0aCI6NjAwLCJoZWlnaHQiOjQ1MH19LCJrZXkiOiJwaG90b18yMjE5ODY1NDgifQ==?signature=f3609b54f16df104f3216443e24473addcb093710cd4a6e2a31763d32efe91cc'],
 ['Air Conditioner',
  'David Adams Richards',
  'https://pictures-ghana.jijistatic.com/23399150_OTYwLTEyMTQtZjY3ZTU0MWNiNg.jpg'],
 ['Computer Table',
  'Gina Bari Kolata',
  'https://media.karousell.com/media/photos/products/2021/5/23/used_computer_table_60x120_1621740774_d98f2fde_progressive.jpg']]

In [31]:
pt.index[10]

'Washing Machine'

In [32]:
import pickle
pickle.dump(popular_df,open('popular.pkl','wb'))