## Import Basic libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


## Import Datasets


In [2]:
books = pd.read_csv('books.csv')
users = pd.read_csv('users.csv')
ratings = pd.read_csv('ratings.csv')

  books = pd.read_csv('books.csv')


## DATASETS

In [3]:
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [4]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [5]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [6]:
print(books.shape)
print(users.shape)
print(ratings.shape)

(271360, 8)
(278858, 3)
(1149780, 3)


## Missing Values


In [7]:
books.isnull().sum()


ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [8]:
users.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

In [9]:
ratings.isnull().sum()


User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

##### Check Duplicates data in datasets

In [10]:
books.duplicated().sum()


0

In [11]:
users.duplicated().sum()


0

In [12]:
ratings.duplicated().sum()

0

## popularity based recommender system

In [13]:
ratings_with_name = ratings.merge(books,on='ISBN')

In [14]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating']. reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_rating'},inplace=True)
num_rating_df

Unnamed: 0,Book-Title,num_rating
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1
...,...,...
241066,Ã?Â?lpiraten.,2
241067,Ã?Â?rger mit Produkt X. Roman.,4
241068,Ã?Â?sterlich leben.,1
241069,Ã?Â?stlich der Berge.,3


In [15]:
# Check the data type of Book-Rating
print(ratings_with_name['Book-Rating'].dtype)

int64


In [16]:
# Convert to numeric type 
ratings_with_name['Book-Rating'] = pd.to_numeric(ratings_with_name['Book-Rating'], errors='coerce')

In [17]:
avg_rating_df = ratings_with_name.groupby('Book-Title')['Book-Rating'].mean().reset_index()
avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'}, inplace=True)
avg_rating_df

Unnamed: 0,Book-Title,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,2.250000
1,Always Have Popsicles,0.000000
2,Apple Magic (The Collector's series),0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,0.000000
...,...,...
241066,Ã?Â?lpiraten.,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,5.250000
241068,Ã?Â?sterlich leben.,7.000000
241069,Ã?Â?stlich der Berge.,2.666667


In [18]:
popular_df = num_rating_df.merge(avg_rating_df,on='Book-Title')
popular_df

Unnamed: 0,Book-Title,num_rating,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,4,2.250000
1,Always Have Popsicles,1,0.000000
2,Apple Magic (The Collector's series),1,0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.000000
...,...,...,...
241066,Ã?Â?lpiraten.,2,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
241068,Ã?Â?sterlich leben.,1,7.000000
241069,Ã?Â?stlich der Berge.,3,2.666667


In [19]:
# Sort according to top rating
popular_df = popular_df[popular_df['num_rating']>300].sort_values('avg_rating',ascending=False).head(50)

In [20]:
popular_df

Unnamed: 0,Book-Title,num_rating,avg_rating
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289
80426,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453
187377,The Fellowship of the Ring (The Lord of the Ri...,368,4.94837
80445,Harry Potter and the Sorcerer's Stone (Harry P...,575,4.895652
219741,To Kill a Mockingbird,510,4.7
183573,The Da Vinci Code,898,4.642539
187880,The Five People You Meet in Heaven,430,4.551163
180556,The Catcher in the Rye,449,4.545657


In [21]:
popular_df.merge(books,on='Book-Title')

Unnamed: 0,Book-Title,num_rating,avg_rating,ISBN,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,0439136350,J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...
1,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,0439136369,J. K. Rowling,2001,Scholastic,http://images.amazon.com/images/P/0439136369.0...,http://images.amazon.com/images/P/0439136369.0...,http://images.amazon.com/images/P/0439136369.0...
2,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,0786222743,J. K. Rowling,2000,Thorndike Press,http://images.amazon.com/images/P/0786222743.0...,http://images.amazon.com/images/P/0786222743.0...,http://images.amazon.com/images/P/0786222743.0...
3,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,0439139597,J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...
4,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,0439139600,J. K. Rowling,2002,Scholastic Paperbacks,http://images.amazon.com/images/P/0439139600.0...,http://images.amazon.com/images/P/0439139600.0...,http://images.amazon.com/images/P/0439139600.0...
...,...,...,...,...,...,...,...,...,...,...
168,Bridget Jones's Diary,815,3.527607,0670880728,Helen Fielding,1998,Viking Books,http://images.amazon.com/images/P/0670880728.0...,http://images.amazon.com/images/P/0670880728.0...,http://images.amazon.com/images/P/0670880728.0...
169,Bridget Jones's Diary,815,3.527607,0330332767,Helen Fielding,1996,Picador (UK),http://images.amazon.com/images/P/0330332767.0...,http://images.amazon.com/images/P/0330332767.0...,http://images.amazon.com/images/P/0330332767.0...
170,White Oleander : A Novel,387,3.506460,0316182540,Janet Fitch,2001,"Little, Brown",http://images.amazon.com/images/P/0316182540.0...,http://images.amazon.com/images/P/0316182540.0...,http://images.amazon.com/images/P/0316182540.0...
171,White Oleander : A Novel,387,3.506460,0316569321,Janet Fitch,1999,"Little, Brown",http://images.amazon.com/images/P/0316569321.0...,http://images.amazon.com/images/P/0316569321.0...,http://images.amazon.com/images/P/0316569321.0...


In [22]:
popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')

Unnamed: 0,Book-Title,num_rating,avg_rating,ISBN,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,0439136350,J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...
3,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,0439139597,J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...
5,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441,043935806X,J. K. Rowling,2003,Scholastic,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...
9,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453,0439064872,J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...
12,The Fellowship of the Ring (The Lord of the Ri...,368,4.94837,0345339703,J.R.R. TOLKIEN,1986,Del Rey,http://images.amazon.com/images/P/0345339703.0...,http://images.amazon.com/images/P/0345339703.0...,http://images.amazon.com/images/P/0345339703.0...
21,Harry Potter and the Sorcerer's Stone (Harry P...,575,4.895652,059035342X,J. K. Rowling,1999,Arthur A. Levine Books,http://images.amazon.com/images/P/059035342X.0...,http://images.amazon.com/images/P/059035342X.0...,http://images.amazon.com/images/P/059035342X.0...
23,To Kill a Mockingbird,510,4.7,0446310786,Harper Lee,1988,Little Brown &amp; Company,http://images.amazon.com/images/P/0446310786.0...,http://images.amazon.com/images/P/0446310786.0...,http://images.amazon.com/images/P/0446310786.0...
31,The Da Vinci Code,898,4.642539,0385504209,Dan Brown,2003,Doubleday,http://images.amazon.com/images/P/0385504209.0...,http://images.amazon.com/images/P/0385504209.0...,http://images.amazon.com/images/P/0385504209.0...
37,The Five People You Meet in Heaven,430,4.551163,0786868716,Mitch Albom,2003,Hyperion,http://images.amazon.com/images/P/0786868716.0...,http://images.amazon.com/images/P/0786868716.0...,http://images.amazon.com/images/P/0786868716.0...
39,The Catcher in the Rye,449,4.545657,0316769487,J.D. Salinger,1991,"Little, Brown",http://images.amazon.com/images/P/0316769487.0...,http://images.amazon.com/images/P/0316769487.0...,http://images.amazon.com/images/P/0316769487.0...


In [23]:
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_rating','avg_rating']]

In [24]:
# Final Popular DATASETS
popular_df

Unnamed: 0,Book-Title,Book-Author,Image-URL-M,num_rating,avg_rating
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,http://images.amazon.com/images/P/0439136350.0...,428,5.852804
3,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,http://images.amazon.com/images/P/0439139597.0...,387,5.824289
5,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,http://images.amazon.com/images/P/043935806X.0...,347,5.501441
9,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,http://images.amazon.com/images/P/0439064872.0...,556,5.183453
12,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339703.0...,368,4.94837
21,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,http://images.amazon.com/images/P/059035342X.0...,575,4.895652
23,To Kill a Mockingbird,Harper Lee,http://images.amazon.com/images/P/0446310786.0...,510,4.7
31,The Da Vinci Code,Dan Brown,http://images.amazon.com/images/P/0385504209.0...,898,4.642539
37,The Five People You Meet in Heaven,Mitch Albom,http://images.amazon.com/images/P/0786868716.0...,430,4.551163
39,The Catcher in the Rye,J.D. Salinger,http://images.amazon.com/images/P/0316769487.0...,449,4.545657


In [25]:
popular_df['Image-URL-M'][0]

'http://images.amazon.com/images/P/0439136350.01.MZZZZZZZ.jpg'

## Collaborative Filtering Based Recommendation System

#### filtered data accorning to user

In [26]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 250
userTread = x[x].index  # userTread means user those read book

In [27]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(userTread)]

#### filterd data according to book

In [28]:
filtered_rating.groupby('Book-Title').count()['Book-Rating']

Book-Title
 A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)    2
 Always Have Popsicles                                                                                        1
 Apple Magic (The Collector's series)                                                                         1
 Beyond IBM: Leadership Marketing and Finance for the 1990s                                                   1
 Clifford Visita El Hospital (Clifford El Gran Perro Colorado)                                                1
                                                                                                             ..
Ã?coute ma diffÃ©rence (Le Temps des femmes)                                                                  1
Ã?Â?ber das Fernsehen.                                                                                        1
Ã?Â?ber die Pflicht zum Ungehorsam gegen den Staat.                                          

In [29]:
#boolean indexing 
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>+30
famous_books = y[y].index

In [30]:
famous_books

Index(['16 Lighthouse Road', '1984', '1st to Die: A Novel',
       '2010: Odyssey Two', '204 Rosewood Lane', '24 Hours', '2nd Chance',
       '4 Blondes', '84 Charing Cross Road',
       'A 2nd Helping of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper))',
       ...
       'Wizard and Glass (The Dark Tower, Book 4)',
       'Women Who Run with the Wolves',
       'Word Freak: Heartbreak, Triumph, Genius, and Obsession in the World of Competitive Scrabble Players',
       'World of Pies : A Novel', 'Wuthering Heights', 'Year of Wonders',
       'You Belong To Me',
       'Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',
       'Zoya', '\O\" Is for Outlaw"'],
      dtype='object', name='Book-Title', length=1333)

In [31]:
final_rating = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [32]:
final_rating

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
1150,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...
1162,277427,006092988X,0,A Tree Grows in Brooklyn,Betty Smith,1998,Perennial,http://images.amazon.com/images/P/006092988X.0...,http://images.amazon.com/images/P/006092988X.0...,http://images.amazon.com/images/P/006092988X.0...
1163,277427,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999,Perennial,http://images.amazon.com/images/P/0060930535.0...,http://images.amazon.com/images/P/0060930535.0...,http://images.amazon.com/images/P/0060930535.0...
1165,277427,0060934417,0,Bel Canto: A Novel,Ann Patchett,2002,Perennial,http://images.amazon.com/images/P/0060934417.0...,http://images.amazon.com/images/P/0060934417.0...,http://images.amazon.com/images/P/0060934417.0...
1168,277427,0061009059,9,One for the Money (Stephanie Plum Novels (Pape...,Janet Evanovich,1995,HarperTorch,http://images.amazon.com/images/P/0061009059.0...,http://images.amazon.com/images/P/0061009059.0...,http://images.amazon.com/images/P/0061009059.0...
...,...,...,...,...,...,...,...,...,...,...
1029202,275970,1400100976,0,Stiff: The Curious Lives of Human Cadavers,Mary Roach,2003,Tantor Media,http://images.amazon.com/images/P/1400100976.0...,http://images.amazon.com/images/P/1400100976.0...,http://images.amazon.com/images/P/1400100976.0...
1029269,275970,1573227951,0,An Instance of the Fingerpost,Iain Pears,2000,Riverhead Books,http://images.amazon.com/images/P/1573227951.0...,http://images.amazon.com/images/P/1573227951.0...,http://images.amazon.com/images/P/1573227951.0...
1029270,275970,1573229725,0,Fingersmith,Sarah Waters,2002,Riverhead Books,http://images.amazon.com/images/P/1573229725.0...,http://images.amazon.com/images/P/1573229725.0...,http://images.amazon.com/images/P/1573229725.0...
1029309,275970,1586210661,9,Me Talk Pretty One Day,David Sedaris,2001,Time Warner Audio Major,http://images.amazon.com/images/P/1586210661.0...,http://images.amazon.com/images/P/1586210661.0...,http://images.amazon.com/images/P/1586210661.0...


In [33]:
pivotTable = final_rating.pivot_table(index='Book-Title', columns='User-ID',values='Book-Rating')

In [34]:
pivotTable

User-ID,254,2276,2766,3363,4385,6251,6543,6575,7158,7346,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16 Lighthouse Road,,,,,,,,,,,...,,,,,,,,,,
1984,9.0,,,,,,,,,8.0,...,10.0,,,,,,0.0,,,
1st to Die: A Novel,,,,,,,9.0,,0.0,,...,,,,,,,,,,
2010: Odyssey Two,,0.0,,,,,,,,,...,,,,,,,,,,
204 Rosewood Lane,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,,,,,,,0.0,0.0,,,...,,9.0,,,,,0.0,,,
You Belong To Me,,,,,,,,,0.0,,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,0.0,,0.0,,,,,...,,,,,,,0.0,,,
Zoya,,,,,,,,,,,...,,0.0,,,,,,,,


In [35]:
pivotTable.fillna(0,inplace=True)

In [36]:
pivotTable

User-ID,254,2276,2766,3363,4385,6251,6543,6575,7158,7346,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16 Lighthouse Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2010: Odyssey Two,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
204 Rosewood Lane,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
  from sklearn.metrics.pairwise import cosine_similarity

In [38]:
similarity_score = cosine_similarity(pivotTable)

In [39]:
similarity_score.shape  ## is show the Euclidean Distance to (383X383)

(1333, 1333)

### Index fetch recommendaiton

In [40]:
def recommendation(book_name):
    if book_name not in pivotTable.index:
        return f"❌ '{book_name}' not found in pivot table. Please check the spelling or try another book."
    
    # index fetch
    index = np.where(pivotTable.index == book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_score[index])), key=lambda x: x[1], reverse=True)[1:6]
    
    data = []
    for i in similar_items:
        item = []
        # Use .iloc[0] safely if only one row after dropping duplicates
        temp_df = books[books['Book-Title'] == pivotTable.index[i[0]]].drop_duplicates('Book-Title')
        
        if not temp_df.empty:
            item.append(temp_df['Book-Title'].values[0])
            item.append(temp_df['Book-Author'].values[0])
            item.append(temp_df['Image-URL-M'].values[0])
            data.append(item)
    
    return data


In [41]:
recommendation('2nd Chance')

[['The Next Accident',
  'LISA GARDNER',
  'http://images.amazon.com/images/P/0553578693.01.MZZZZZZZ.jpg'],
 ['The Lake House',
  'James Patterson',
  'http://images.amazon.com/images/P/0316603287.01.MZZZZZZZ.jpg'],
 ['The Oath',
  'John Lescroart',
  'http://images.amazon.com/images/P/0451207645.01.MZZZZZZZ.jpg'],
 ['Four Blind Mice',
  'James Patterson',
  'http://images.amazon.com/images/P/0316693006.01.MZZZZZZZ.jpg'],
 ['The Jury',
  'Steve Martini',
  'http://images.amazon.com/images/P/0515132136.01.MZZZZZZZ.jpg']]

In [42]:
recommendation('4 Blondes')

[['One Hundred Years of Solitude',
  'Gabriel Garcia Marquez',
  'http://images.amazon.com/images/P/0060929790.01.MZZZZZZZ.jpg'],
 ['The House of the Spirits',
  'Isabel Allende',
  'http://images.amazon.com/images/P/0553273914.01.MZZZZZZZ.jpg'],
 ['The Beach',
  'Alex Garland',
  'http://images.amazon.com/images/P/1573226521.01.MZZZZZZZ.jpg'],
 ['Pride and Prejudice',
  'Jane Austen',
  'http://images.amazon.com/images/P/055321215X.01.MZZZZZZZ.jpg'],
 ['Poland',
  'James A. Michener',
  'http://images.amazon.com/images/P/0394533887.01.MZZZZZZZ.jpg']]

In [43]:
recommendation('Message in a Bottle')

[['Nights in Rodanthe',
  'Nicholas Sparks',
  'http://images.amazon.com/images/P/0446531332.01.MZZZZZZZ.jpg'],
 ['The Mulberry Tree',
  'Jude Deveraux',
  'http://images.amazon.com/images/P/0743437640.01.MZZZZZZZ.jpg'],
 ['A Walk to Remember',
  'Nicholas Sparks',
  'http://images.amazon.com/images/P/0446608955.01.MZZZZZZZ.jpg'],
 ["River's End",
  'Nora Roberts',
  'http://images.amazon.com/images/P/0515127833.01.MZZZZZZZ.jpg'],
 ['Nightmares &amp; Dreamscapes',
  'Stephen King',
  'http://images.amazon.com/images/P/0451180232.01.MZZZZZZZ.jpg']]

In [44]:
recommendation('Secret History')

[['The Talented Mr. Ripley (Vintage Crime/Black Lizard)',
  'Patricia Highsmith',
  'http://images.amazon.com/images/P/0679742298.01.MZZZZZZZ.jpg'],
 ['Waiting (Vintage International)',
  'Ha Jin',
  'http://images.amazon.com/images/P/0375706410.01.MZZZZZZZ.jpg'],
 ['The Beach',
  'Alex Garland',
  'http://images.amazon.com/images/P/1573226521.01.MZZZZZZZ.jpg'],
 ['Geek Love',
  'Katherine Dunn',
  'http://images.amazon.com/images/P/0446391301.01.MZZZZZZZ.jpg'],
 ['The Return of the Indian (Indian in the Cupboard)',
  'Lynne Reid Banks',
  'http://images.amazon.com/images/P/0380702843.01.MZZZZZZZ.jpg']]

In [45]:
import pickle 
pickle.dump(popular_df,open('popular.pkl','wb'))

In [46]:
pickle.dump(pivotTable,open('pivotTable.pkl','wb'))
pickle.dump(books,open(' books.pkl','wb'))
pickle.dump(similarity_score,open('similarity_score.pkl','wb'))

In [47]:
recommendation('The Da Vinci Code')

[['Angels &amp; Demons',
  'Dan Brown',
  'http://images.amazon.com/images/P/0671027360.01.MZZZZZZZ.jpg'],
 ['TickTock',
  'Dean R. Koontz',
  'http://images.amazon.com/images/P/034538430X.01.MZZZZZZZ.jpg'],
 ["The Sweet Potato Queens' Book of Love",
  'JILL CONNER BROWNE',
  'http://images.amazon.com/images/P/0609804138.01.MZZZZZZZ.jpg'],
 ['Sea Glass: A Novel',
  'Anita Shreve',
  'http://images.amazon.com/images/P/0316089699.01.MZZZZZZZ.jpg'],
 ['Timeline',
  'MICHAEL CRICHTON',
  'http://images.amazon.com/images/P/0345417623.01.MZZZZZZZ.jpg']]

In [48]:
def get_user_liked_books(user_id, threshold=7):
    liked_books = filtered_rating[
        (filtered_rating['User-ID'] == user_id) &
        (filtered_rating['Book-Rating'] >= threshold)
    ]
    return liked_books['Book-Title'].unique().tolist()


In [49]:
def evaluate_user(user_id):
    liked_books = get_user_liked_books(user_id)
    recommended_books = []

    for book in liked_books:
        if book not in pivotTable.index:
            continue
        try:
            recs = recommendation(book)
            if isinstance(recs, str):  # "book not found"
                continue
            recommended_titles = [r[0] for r in recs]
            recommended_books.extend(recommended_titles)
        except:
            continue

    # Remove already liked books from recommendations
    recommended_books = list(set(recommended_books) - set(liked_books))
    return liked_books, recommended_books


In [50]:
def precision_recall(user_id):
    liked, recommended = evaluate_user(user_id)

    if not liked or not recommended:
        return {'user_id': user_id, 'precision': 0.0, 'recall': 0.0}

    hits = len(set(liked) & set(recommended))
    precision = hits / len(recommended) if recommended else 0
    recall = hits / len(liked) if liked else 0

    return {
        'user_id': user_id,
        'precision': round(precision, 3),
        'recall': round(recall, 3)
    }


In [51]:
user_ids = filtered_rating['User-ID'].unique()[:30]  # test on 30 users

results = [precision_recall(uid) for uid in user_ids]

import pandas as pd
results_df = pd.DataFrame(results)

print("Average Precision:", results_df['precision'].mean())
print("Average Recall:", results_df['recall'].mean())


Average Precision: 0.0
Average Recall: 0.0


In [52]:
# Pick one user manually
user_id = filtered_rating['User-ID'].iloc[0]
liked_books = filtered_rating[(filtered_rating['User-ID'] == user_id) & (filtered_rating['Book-Rating'] >= 7)]['Book-Title'].unique()

print("User ID:", user_id)
print("Liked Books:", liked_books)

# Now test recommendation for just one liked book
for book in liked_books:
    print(f"\nTesting recommendation for: {book}")
    if book in pivotTable.index:
        recs = recommendation(book)
        print("Recommendations:", [r[0] for r in recs])
    else:
        print("This book not found in pivotTable")


User ID: 277427
Liked Books: ['Politically Correct Bedtime Stories: Modern Tales for Our Life and Times'
 'Pioneers'
 'On Writing Well, 25th Anniversary : The Classic Guide to Writing Nonfiction (On Writing Well)'
 'When the Storm Breaks'
 'One for the Money (Stephanie Plum Novels (Paperback))'
 'Inner Bonding: Becoming a Loving Adult to Your Inner Child'
 'Dynamics of Motor-Skill Acquisition' 'The Prairie (Penguin Classics)'
 'The Pathfinder (Penguin Classic)' 'Oliver Twist (Penguin Classics)'
 'The Whale Rider' "Transitions: Making Sense of Life's Changes"
 'Encyclopedia of Bible Difficulties, An' "World's End"
 'Me Talk Pretty One Day' 'The Murder Book'
 "Random House Webster's Quotationary"
 'The Picture of Dorian Gray (Modern Library (Paperback))'
 'The Return of the Indian (Indian in the Cupboard)' 'The Family Tree'
 'Abandon Ship!' 'The Rainmaker' 'Into the Wild' 'Oryx and Crake'
 'The Da Vinci Code'
 'The Perfect Storm: A True Story of Men Against the Sea'
 'The Complete Indoor