# Good Reads Recommender System

### Import Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise  import cosine_similarity

### Import Data and Explore

In [2]:
gr = pd.read_csv('books.csv')

In [3]:
gr.head()

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,4.57,439785960,9780440000000.0,eng,652,2095690,27591,09-16-2006,Scholastic Inc.
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,4.49,439358078,9780440000000.0,eng,870,2153167,29221,09-01-2004,Scholastic Inc.
2,4,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,4.42,439554896,9780440000000.0,eng,352,6333,244,11-01-2003,Scholastic
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,043965548X,9780440000000.0,eng,435,2339585,36325,05-01-2004,Scholastic Inc.
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,4.78,439682584,9780440000000.0,eng,2690,41428,164,09-13-2004,Scholastic


In [4]:
gr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11127 entries, 0 to 11126
Data columns (total 12 columns):
bookID                11127 non-null int64
title                 11127 non-null object
authors               11127 non-null object
average_rating        11127 non-null float64
isbn                  11127 non-null object
isbn13                11127 non-null float64
language_code         11127 non-null object
  num_pages           11127 non-null int64
ratings_count         11127 non-null int64
text_reviews_count    11127 non-null int64
publication_date      11127 non-null object
publisher             11127 non-null object
dtypes: float64(2), int64(4), object(6)
memory usage: 1.0+ MB


In [5]:
# Check for missing values
gr.isnull().sum(axis = 0)

bookID                0
title                 0
authors               0
average_rating        0
isbn                  0
isbn13                0
language_code         0
  num_pages           0
ratings_count         0
text_reviews_count    0
publication_date      0
publisher             0
dtype: int64

### Popularity Based Recommender

In [6]:
gr.head(3).transpose()

Unnamed: 0,0,1,2
bookID,1,2,4
title,Harry Potter and the Half-Blood Prince (Harry ...,Harry Potter and the Order of the Phoenix (Har...,Harry Potter and the Chamber of Secrets (Harry...
authors,J.K. Rowling/Mary GrandPré,J.K. Rowling/Mary GrandPré,J.K. Rowling
average_rating,4.57,4.49,4.42
isbn,439785960,439358078,439554896
isbn13,9.78044e+12,9.78044e+12,9.78044e+12
language_code,eng,eng,eng
num_pages,652,870,352
ratings_count,2095690,2153167,6333
text_reviews_count,27591,29221,244


In [7]:
# Recommendations based on highest ratings
recommendations = gr.sort_values(by='average_rating',ascending=False).head(5)
recommendations

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher
5650,20957,Colossians and Philemon: A Critical and Exeget...,R. McL. Wilson,5.0,567044718,9780570000000.0,eng,512,1,0,12-07-2005,T&T Clark Int'l
1243,4287,Middlesex Borough (Images of America: New Jersey),Middlesex Borough Heritage Committee,5.0,738511676,9780740000000.0,eng,128,2,0,03-17-2003,Arcadia Publishing
5649,20954,Winchester Shotguns,Dennis Adler/R.L. Wilson,5.0,785821082,9780790000000.0,eng,372,2,0,05-15-2008,Chartwell Books
6778,25426,Delwau Duon: Peintiadau Nicholas Evans = Symph...,Nicholas Evans/Rhonda Evans,5.0,862431352,9780860000000.0,wel,150,1,0,06-22-1987,Y Lolfa
4935,17830,Bulgakov's the Master and Margarita: The Text ...,Elena N. Mahlow,5.0,533017424,9780530000000.0,eng,202,4,0,01-01-1975,Vantage Press


In [8]:
# Recommendations based on highest vote count
recommendations = gr.sort_values(by='ratings_count',ascending=False).head(5)
recommendations

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher
10340,41865,Twilight (Twilight #1),Stephenie Meyer,3.59,316015849,9780320000000.0,eng,501,4597666,94265,09-06-2006,Little Brown and Company
1697,5907,The Hobbit or There and Back Again,J.R.R. Tolkien,4.27,618260307,9780620000000.0,eng,366,2530894,32871,08-15-2002,Houghton Mifflin
1462,5107,The Catcher in the Rye,J.D. Salinger,3.8,316769177,9780320000000.0,eng,277,2457092,43499,01-30-2001,Back Bay Books
307,960,Angels & Demons (Robert Langdon #1),Dan Brown,3.89,1416524797,9781420000000.0,eng,736,2418736,21303,04-01-2006,Pocket Books
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,043965548X,9780440000000.0,eng,435,2339585,36325,05-01-2004,Scholastic Inc.


In [9]:
# IMDB formula
# IMDB Weighted Rating (WR) = ((v/(v+m))*R)+((m/(v+m))*C)
# v is the number of votes for the movie
# m is the minimum votes to qualify
# R is the average rating of the movie
# C is the mean rating for all movies in the dataset

In [10]:
def Popularity_Based_Recommender():
    # Number of Votes
    v = gr['ratings_count']
    # Minimum Vote Count
    m = 0.75 * gr['ratings_count'].max()
    # Average Rating
    R = gr['average_rating']
    # Mean Rating
    C = gr['average_rating'].mean()
    # Weighted Rating
    gr['weighted_rating'] = (((v/(v+m))*R)+((m/(v+m))*C))
    
    recommendations = gr.sort_values(by='weighted_rating',ascending=False).head(5)
    recommendations
    return recommendations

In [11]:
Popularity_Based_Recommender()

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher,weighted_rating
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,043965548X,9780440000000.0,eng,435,2339585,36325,05-01-2004,Scholastic Inc.,4.186825
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,4.57,439785960,9780440000000.0,eng,652,2095690,27591,09-16-2006,Scholastic Inc.,4.174188
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,4.49,439358078,9780440000000.0,eng,870,2153167,29221,09-01-2004,Scholastic Inc.,4.147497
4416,15881,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling/Mary GrandPré,4.42,439064864,9780440000000.0,eng,341,2293963,34692,06-02-1999,Arthur A. Levine Books / Scholastic Inc.,4.127931
23,34,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. Tolkien,4.36,618346252,9780620000000.0,eng,398,2128944,13670,09-05-2003,Houghton Mifflin Harcourt,4.096386


### Content Based Recommender

In [12]:
def Content_Based_Recommender(title,indices,distance_matrix):
    id_ = indices[title]
    distances = list(enumerate(distance_matrix[id_]))
    distances = sorted(distances,key=lambda x:x[0],reverse=True)
    distances = distances[1:6]
    recommendations = [distance[0] for distance in distances]
    return gr['authors'].iloc[recommendations]

In [13]:
tfidf = TfidfVectorizer(stop_words='english')
gr['authors'] = gr['authors'].fillna('')
tfidf_matrix = tfidf.fit_transform(gr['authors'])
distance_matrix = cosine_similarity(tfidf_matrix)
indices = pd.Series(gr.index,index=gr['authors']).drop_duplicates()

In [14]:
Content_Based_Recommender('William T. Vollmann',indices,distance_matrix)

3    J.K. Rowling/Mary GrandPré
2                  J.K. Rowling
1    J.K. Rowling/Mary GrandPré
0    J.K. Rowling/Mary GrandPré
Name: authors, dtype: object