# Book Recommender System

In [1]:
import pandas as pd
import numpy as np

In [2]:
books = pd.read_csv('./data/Books.csv')
ratings = pd.read_csv('./data/Ratings.csv')
users = pd.read_csv('./data/Users.csv')
users.head()

  books = pd.read_csv('./data/Books.csv')


Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


## Data Preprocessing

In [3]:
# book ratings with book info
book_ratings = ratings.merge(books, on = 'ISBN')
book_ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...


Determining a proper list of popular items requires us to consider the number of ratings in tandem with the average rating for a book. If we define popularity only in terms of average book ratings, we run into many instances where a book with 1 10/10 rating would be considered more popular than a book with 100 9.9/10 ratings. This interpretation doesnt exactly match the semantic meaning of popularity, so we want a ranking factor that prioritizes books with *many* high ratings.
The Bayesian average will allow us to make meaningful comparisons between highly rated items. 

In [4]:
num_ratings = book_ratings.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_ratings.rename(columns = {'Book-Rating':'num_ratings'}, inplace = True)

avg_ratings = book_ratings.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_ratings.rename(columns = {'Book-Rating':'avg_ratings'}, inplace = True)

popular = num_ratings.merge(avg_ratings, on = 'Book-Title')
popular.head()

  avg_ratings = book_ratings.groupby('Book-Title').mean()['Book-Rating'].reset_index()


Unnamed: 0,Book-Title,num_ratings,avg_ratings
0,A Light in the Storm: The Civil War Diary of ...,4,2.25
1,Always Have Popsicles,1,0.0
2,Apple Magic (The Collector's series),1,0.0
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.0
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.0


In [5]:
# Ranked by Bayesian Average
def score(df):
    R = df['avg_ratings']
    C = R.mean()
    v = df['num_ratings']
    m = v.mean()
    w = v / (v + m)
    S = w * R + (1 - w) * C
    return S

popular['score'] = score(popular)
popular_by_score = popular.sort_values(by = 'score', ascending = False)
popular_by_score.head(10)

Unnamed: 0,Book-Title,num_ratings,avg_ratings,score
70396,Free,56,8.017857,7.653991
178359,The Blue Day Book: A Lesson in Cheering Yourse...,11,9.181818,7.420285
34752,Chobits (Chobits),28,8.035714,7.353835
47298,Death: The High Cost of Living,9,9.444444,7.332961
207326,The Shrinking of Treehorn,8,9.5,7.197179
4462,A Letter to Mrs. Roosevelt,8,9.5,7.197179
176446,The Baby Book: Everything You Need to Know Abo...,13,8.461538,7.082237
196050,The Lord of the Rings (Leatherette Collector's...,6,10.0,7.040947
104554,"Le Combat ordinaire, tome 1",8,9.25,7.034277
58541,El Hobbit,26,7.692308,7.013899


In [6]:
# Ranked Manually
popular_overall = popular[popular['num_ratings'] >= 300].sort_values('avg_ratings', ascending = False)
popular_overall.head(10)

Unnamed: 0,Book-Title,num_ratings,avg_ratings,score
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,5.823489
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,5.792214
80426,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441,5.469644
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453,5.165945
187377,The Fellowship of the Ring (The Lord of the Ri...,368,4.94837,4.924721
80445,Harry Potter and the Sorcerer's Stone (Harry P...,575,4.895652,4.880844
219741,To Kill a Mockingbird,510,4.7,4.684947
183573,The Da Vinci Code,898,4.642539,4.634232
187880,The Five People You Meet in Heaven,430,4.551163,4.534803
180556,The Catcher in the Rye,449,4.545657,4.530035


In [7]:
popular_df = popular_overall.merge(books, on = 'Book-Title').drop_duplicates('Book-Title')[['Book-Title', 'Book-Author', 'Image-URL-M','num_ratings','avg_ratings']]
popular_df.head(10)

Unnamed: 0,Book-Title,Book-Author,Image-URL-M,num_ratings,avg_ratings
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,http://images.amazon.com/images/P/0439136350.0...,428,5.852804
3,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,http://images.amazon.com/images/P/0439139597.0...,387,5.824289
5,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,http://images.amazon.com/images/P/043935806X.0...,347,5.501441
9,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,http://images.amazon.com/images/P/0439064872.0...,556,5.183453
12,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339703.0...,368,4.94837
21,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,http://images.amazon.com/images/P/059035342X.0...,575,4.895652
23,To Kill a Mockingbird,Harper Lee,http://images.amazon.com/images/P/0446310786.0...,510,4.7
31,The Da Vinci Code,Dan Brown,http://images.amazon.com/images/P/0385504209.0...,898,4.642539
37,The Five People You Meet in Heaven,Mitch Albom,http://images.amazon.com/images/P/0786868716.0...,430,4.551163
39,The Catcher in the Rye,J.D. Salinger,http://images.amazon.com/images/P/0316769487.0...,449,4.545657


## Collaborative Filtering-Based RecSys

In [8]:
# Users that gave >200 ratings
reviewers = book_ratings.groupby('User-ID').count()['Book-Rating']>200
reviewers_index = reviewers[reviewers].index
reviewers_df = book_ratings[book_ratings['User-ID'].isin(reviewers_index)]

# Books with >=50 ratings
rated_books = reviewers_df.groupby('Book-Title').count()['Book-Rating']>=50
rated_books_index = rated_books[rated_books].index

# Ratings with >=50 ratings only from users who gave >200 ratings
overall_ratings = reviewers_df[reviewers_df['Book-Title'].isin(rated_books_index)]
overall_ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
63,278418,446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
65,3363,446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
66,7158,446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
69,11676,446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
74,23768,446520802,6,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...


In [9]:
from sklearn.metrics.pairwise import cosine_similarity

pt = overall_ratings.pivot_table(index = 'Book-Title', columns = 'User-ID', values = 'Book-Rating')
pt.fillna(0, inplace = True)

sim_score = cosine_similarity(pt)

In [10]:
def recommend(book_name):
    index = np.where(pt.index == book_name)[0][0]
    similar = sorted(enumerate(sim_score[index]), key = lambda x:x[1], reverse = True)[1:11]
    rec_books = []
    for i in similar:
        item = []
        temp = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp.drop_duplicates('Book-Title')['Image-URL-M'].values))
        rec_books.append(item)
    return rec_books

book = '1984'
recommendations = recommend(book)
recommendations

[['Animal Farm',
  'George Orwell',
  'http://images.amazon.com/images/P/0451526341.01.MZZZZZZZ.jpg'],
 ["The Handmaid's Tale",
  'Margaret Atwood',
  'http://images.amazon.com/images/P/0449212602.01.MZZZZZZZ.jpg'],
 ['Brave New World',
  'Aldous Huxley',
  'http://images.amazon.com/images/P/0060809833.01.MZZZZZZZ.jpg'],
 ['The Vampire Lestat (Vampire Chronicles, Book II)',
  'ANNE RICE',
  'http://images.amazon.com/images/P/0345313860.01.MZZZZZZZ.jpg'],
 ['The Hours : A Novel',
  'Michael Cunningham',
  'http://images.amazon.com/images/P/0312243022.01.MZZZZZZZ.jpg'],
 ['Fahrenheit 451',
  'Ray Bradbury',
  'http://images.amazon.com/images/P/3257208626.01.MZZZZZZZ.jpg'],
 ['The Catcher in the Rye',
  'J.D. Salinger',
  'http://images.amazon.com/images/P/0316769487.01.MZZZZZZZ.jpg'],
 ['Naked',
  'David Sedaris',
  'http://images.amazon.com/images/P/0316777730.01.MZZZZZZZ.jpg'],
 ['The Hundred Secret Senses',
  'Amy Tan',
  'http://images.amazon.com/images/P/0399141146.01.MZZZZZZZ.jpg']

## Create GUI

In [11]:
from tkinter import *
from PIL import ImageTk, Image

In [12]:

root = Tk()
frame = Frame(root)
frame.pack()

# Entry Frame ---------------------------------
prompt_label = Label(frame, text = 'Book Title')
prompt_label.pack(side = LEFT)

book_title = StringVar()
user_entry = Entry(frame, textvariable = book_title)
user_entry.pack(side = LEFT)

def showResults(book_title):
    frame = Frame(root)
    label = Label(frame, text = 'TODO')
    frame.pack()

go_btn = Button(frame, text = 'Get Recs', command = showResults(book_title))
go_btn.pack(side = LEFT)

# Result Frame ---------------------------------

    

root.title('Book RecSys')
root.mainloop()

In [13]:
book = '1984'
recommendations = recommend(book)
recommendations[0]

def createBookFrame(parent, rec):
    book = Frame(parent)
    title = Label(book, text = rec[0])
    title.pack()
    author = Label(book, text = rec[1])
    author.pack()
    img = ImageTk.PhotoImage(Image.open(rec[2]))
    viewer = Label(root, image = img)
    viewer.pack()
    return book

def createResultFrame(parent,recs):
    result = Frame(parent)
    result.pack()
    for rec in recs:
        book = createBookFrame(result, rec)
        book.pack()
    return result

def showResults(parent, book_title):
    if book_title == '':
        pass
    recs = recommend(book_title)
    results = createResultFrame(parent, recs)
    results.pack()
    



root = Tk()
main = Frame(root)
main.pack()

frame1 = Frame(main)
frame1.pack()

frame2 = Frame(main)
frame2.pack()

prompt_label = Label(frame1, text = 'Book Title')
prompt_label.pack(side = LEFT)

book_title = StringVar()
user_entry = Entry(frame1, textvariable = book_title)
user_entry.pack(side = LEFT)

go_btn = Button(frame1, text = 'Get Recs', command = showResults(frame2, book_title))
go_btn.pack(side = LEFT)

# Result Frame ---------------------------------

    

root.title('Book RecSys')
root.mainloop()

IndexError: index 0 is out of bounds for axis 0 with size 0

In [14]:
import tkinter as tk
from tkinter import ttk

class BookRecSys:
    def __init__(self, root):
        main_frame = Frame(root)
        main_frame.pack()

        prompt_label = Label(frame, text = 'Book Title')
        prompt_label.pack(side = LEFT)

        book_title = StringVar()
        user_entry = Entry(frame, textvariable = book_title)
        user_entry.pack(side = LEFT)

        go_btn = Button(frame, text = 'Get Recs', command = showResults(book_title))
        go_btn.pack(side = LEFT)

        result_frame = Frame(root)
        result_frame.pack()

        Label(result_frame, text = 'To Implement').pack()
        

root = Tk()
app = BookRecSys(root)
root.mainloop()

TclError: can't invoke "label" command: application has been destroyed