There are two main types of collaborative filtering:

User-based collaborative filtering: This method finds similar users based on their past interactions with items and then recommends items that similar users have liked. For example, if two users have similar viewing histories on Netflix, the system may recommend the same movie to both users.

Item-based collaborative filtering: This method finds similar items based on how users have interacted with them and then recommends those similar items to a user. For example, if a user has liked several movies of a particular genre, the system may recommend other movies of that genre to the user.

In [1]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
bookDetails=pd.read_csv('Books.csv')
ratings=pd.read_csv('Ratings.csv')
userDetails=pd.read_csv('Users.csv')

In [3]:
print(bookDetails.shape)
print(ratings.shape)
print(userDetails.shape)

(271360, 8)
(1149780, 3)
(278858, 3)


# 1. Merging ratings and bookDetails data - to get book details with ratings and userID

In [4]:
ratings_with_bookDetails=ratings.merge(bookDetails,on='ISBN')
ratings_with_bookDetails.head(2)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...


In [5]:
# Getting significant users that have read books above 200.
x = ratings_with_bookDetails.groupby('User-ID').count()['Book-Rating'] > 200
significant_user = x[x].index

In [6]:
significant_user

Int64Index([   254,   2276,   2766,   2977,   3363,   4017,   4385,   6251,
              6323,   6543,
            ...
            271705, 273979, 274004, 274061, 274301, 274308, 275970, 277427,
            277639, 278418],
           dtype='int64', name='User-ID', length=811)

In [7]:
significant_user_bookdetails = ratings_with_bookDetails[ratings_with_bookDetails['User-ID'].isin(significant_user)]
significant_user_bookdetails

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
5,23768,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
7,28523,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
15,77940,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
16,81977,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
...,...,...,...,...,...,...,...,...,...,...
1030883,275970,1880837927,0,The Theology of the Hammer,Millard Fuller,1994,Smyth &amp; Helwys Publishing,http://images.amazon.com/images/P/1880837927.0...,http://images.amazon.com/images/P/1880837927.0...,http://images.amazon.com/images/P/1880837927.0...
1030884,275970,188717897X,0,The Ordeal of Integration: Progress and Resent...,Orlando Patterson,1998,Civitas Book Publisher,http://images.amazon.com/images/P/188717897X.0...,http://images.amazon.com/images/P/188717897X.0...,http://images.amazon.com/images/P/188717897X.0...
1030885,275970,1888889047,0,Pushcart's Complete Rotten Reviews &amp; Rejec...,Bill Henderson,1998,Pushcart Press,http://images.amazon.com/images/P/1888889047.0...,http://images.amazon.com/images/P/1888889047.0...,http://images.amazon.com/images/P/1888889047.0...
1030886,275970,1931868123,0,There's a Porcupine in My Outhouse: Misadventu...,Mike Tougias,2002,Capital Books (VA),http://images.amazon.com/images/P/1931868123.0...,http://images.amazon.com/images/P/1931868123.0...,http://images.amazon.com/images/P/1931868123.0...


In [17]:
# filtering books having readcount >50 from significant_user_bookdetails
y=significant_user_bookdetails.groupby('Book-Title').count()['Book-Rating']>=50
famousBooks=y[y].index
famousBooks

Index(['1984', '1st to Die: A Novel', '2nd Chance', '4 Blondes',
       'A Bend in the Road', 'A Case of Need',
       'A Child Called \It\": One Child's Courage to Survive"',
       'A Civil Action', 'A Day Late and a Dollar Short', 'A Fine Balance',
       ...
       'Winter Solstice', 'Wish You Well', 'Without Remorse',
       'Wizard and Glass (The Dark Tower, Book 4)', 'Wuthering Heights',
       'Year of Wonders', 'You Belong To Me',
       'Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',
       'Zoya', '\O\" Is for Outlaw"'],
      dtype='object', name='Book-Title', length=706)

In [18]:
final_ratings = significant_user_bookdetails[significant_user_bookdetails['Book-Title'].isin(famousBooks)]
final_ratings

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
63,278418,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
65,3363,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
66,7158,0446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
69,11676,0446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
74,23768,0446520802,6,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
...,...,...,...,...,...,...,...,...,...,...
1026724,266865,0531001725,10,The Catcher in the Rye,Jerome David Salinger,1973,Scholastic Library Pub,http://images.amazon.com/images/P/0531001725.0...,http://images.amazon.com/images/P/0531001725.0...,http://images.amazon.com/images/P/0531001725.0...
1027923,269566,0670809381,0,Echoes,Maeve Binchy,1986,Penguin USA,http://images.amazon.com/images/P/0670809381.0...,http://images.amazon.com/images/P/0670809381.0...,http://images.amazon.com/images/P/0670809381.0...
1028777,271284,0440910927,0,The Rainmaker,John Grisham,1995,Island,http://images.amazon.com/images/P/0440910927.0...,http://images.amazon.com/images/P/0440910927.0...,http://images.amazon.com/images/P/0440910927.0...
1029070,271705,B0001PIOX4,0,Fahrenheit 451,Ray Bradbury,1993,Simon &amp; Schuster,http://images.amazon.com/images/P/B0001PIOX4.0...,http://images.amazon.com/images/P/B0001PIOX4.0...,http://images.amazon.com/images/P/B0001PIOX4.0...


# Pivoting and then finding cosine similarity

In [19]:
pt = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')
pt.fillna(0, inplace=True)
pt

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Cosine similarity

Cosine similarity is the cosine of the angle between two vectors and it is used as a distance evaluation metric between two points in the plane. The cosine similarity measure operates entirely on the cosine principles where with the increase in distance the similarity of data points reduces.

Here the cosine similarity is between different books and vectors are the ratings of the users.

In [20]:
from sklearn.metrics.pairwise import cosine_similarity
similarity_score = cosine_similarity(pt)
similarity_score.shape

(706, 706)

In [21]:
similarity_score

array([[1.        , 0.10255025, 0.01220856, ..., 0.12110367, 0.07347567,
        0.04316046],
       [0.10255025, 1.        , 0.2364573 , ..., 0.07446129, 0.16773875,
        0.14263397],
       [0.01220856, 0.2364573 , 1.        , ..., 0.04558758, 0.04938579,
        0.10796119],
       ...,
       [0.12110367, 0.07446129, 0.04558758, ..., 1.        , 0.07085128,
        0.0196177 ],
       [0.07347567, 0.16773875, 0.04938579, ..., 0.07085128, 1.        ,
        0.10602962],
       [0.04316046, 0.14263397, 0.10796119, ..., 0.0196177 , 0.10602962,
        1.        ]])

# The numpy.where() function returns the indices of elements in an input array where the given condition is satisfied.

In [35]:
np.where(pt.index=='2nd Chance')

(array([2], dtype=int64),)

In [36]:
np.where(pt.index=='2nd Chance')[0]

array([2], dtype=int64)

In [37]:
np.where(pt.index=='2nd Chance')[0][0]

2

In [39]:
# enumerating the index of book
list(enumerate(similarity_score[2]))

[(0, 0.012208555646509366),
 (1, 0.236457295254443),
 (2, 1.0),
 (3, 0.0),
 (4, 0.06909024214591328),
 (5, 0.10559126382470128),
 (6, 0.0),
 (7, 0.10774375110550646),
 (8, 0.0670218487291342),
 (9, 0.0416819540485443),
 (10, 0.04772156476776279),
 (11, 0.0),
 (12, 0.047580047424453376),
 (13, 0.044455151509031496),
 (14, 0.052007058137221944),
 (15, 0.09877662230505671),
 (16, 0.06505524505788216),
 (17, 0.16998812090738535),
 (18, 0.11381916242700778),
 (19, 0.11713887938926706),
 (20, 0.034240966437880764),
 (21, 0.0878014420489393),
 (22, 0.19734903096369646),
 (23, 0.0),
 (24, 0.1029666684450606),
 (25, 0.0),
 (26, 0.058626484557583125),
 (27, 0.03167737914741039),
 (28, 0.026650717122388885),
 (29, 0.09277209651363727),
 (30, 0.015781976359504266),
 (31, 0.11200667554588858),
 (32, 0.07976510802634351),
 (33, 0.09795365542039074),
 (34, 0.11151806442907786),
 (35, 0.1329776664248242),
 (36, 0.07759341529983549),
 (37, 0.0949007868093896),
 (38, 0.10728083132678148),
 (39, 0.098855

In [41]:
sorted(list(enumerate(similarity_score[2])), key= lambda x:x[1], reverse=True)

[(2, 1.0),
 (203, 0.3918276378683357),
 (577, 0.34991368995007677),
 (666, 0.30619341327105154),
 (575, 0.29468927698294456),
 (198, 0.2744597369070355),
 (338, 0.26840230908242607),
 (96, 0.26814324064048733),
 (486, 0.25932139662678516),
 (166, 0.2524098979487631),
 (368, 0.2508772113214677),
 (382, 0.24923306067547385),
 (81, 0.24169010979761998),
 (130, 0.2380506201999118),
 (1, 0.236457295254443),
 (582, 0.23312699202321227),
 (427, 0.2291408558371912),
 (193, 0.22709354851686991),
 (495, 0.22646115140179207),
 (80, 0.2261405280701767),
 (483, 0.2245044128101898),
 (205, 0.22027645398763274),
 (172, 0.21765288149629525),
 (352, 0.21708609119306566),
 (518, 0.21433446878135595),
 (659, 0.21315996596970338),
 (646, 0.2123999887154975),
 (144, 0.21202252984878692),
 (380, 0.20874041220419337),
 (628, 0.2050466947483047),
 (454, 0.20478283997913724),
 (579, 0.2030148440637205),
 (456, 0.20246105679048543),
 (58, 0.20224734136070588),
 (299, 0.20172489588333678),
 (620, 0.2004012177738

In [42]:
# First element is the book index itself so finding next 5 books close to the given index book by slicing
sorted(list(enumerate(similarity_score[2])), key= lambda x:x[1], reverse=True)[1:6]

[(203, 0.3918276378683357),
 (577, 0.34991368995007677),
 (666, 0.30619341327105154),
 (575, 0.29468927698294456),
 (198, 0.2744597369070355)]

# using these sort of functions we will form a function recommend for finding similar books.

In [45]:
def recommend(book_name):
    index=np.where(pt.index==book_name)[0][0]
    similar_items=sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1],reverse=True)[1:6]
    data=[]
    for i in similar_items:
        item=[]
        temp_df=bookDetails[bookDetails['Book-Title']==pt.index[i[0]]]
        item.extend(temp_df.drop_duplicates('Book-Title')['Book-Title'].values)
        item.extend(temp_df.drop_duplicates('Book-Title')['Book-Author'].values)
        item.extend(temp_df.drop_duplicates('Book-Title')['Image-URL-L'].values)
        data.append(item)
    return data
    

In [46]:
recommend('1984')

[['Animal Farm',
  'George Orwell',
  'http://images.amazon.com/images/P/0451526341.01.LZZZZZZZ.jpg'],
 ["The Handmaid's Tale",
  'Margaret Atwood',
  'http://images.amazon.com/images/P/0449212602.01.LZZZZZZZ.jpg'],
 ['Brave New World',
  'Aldous Huxley',
  'http://images.amazon.com/images/P/0060809833.01.LZZZZZZZ.jpg'],
 ['The Vampire Lestat (Vampire Chronicles, Book II)',
  'ANNE RICE',
  'http://images.amazon.com/images/P/0345313860.01.LZZZZZZZ.jpg'],
 ['The Hours : A Novel',
  'Michael Cunningham',
  'http://images.amazon.com/images/P/0312243022.01.LZZZZZZZ.jpg']]