In [1]:
import numpy as np
import pandas as pd

books = pd.read_csv('Books.csv')
ratings = pd.read_csv('Ratings.csv')
users = pd.read_csv('Users.csv')

  interactivity=interactivity, compiler=compiler, result=result)


## Popularity-based rating system

In [2]:
ratings_with_books = ratings.merge(books, on='ISBN')

In [3]:
num_ratings_df = ratings_with_books.groupby('Book-Title').count()['Book-Rating'].reset_index().rename(columns={'Book-Rating':"num_ratings"})

In [4]:
avg_rating_df = ratings_with_books.groupby('Book-Title').mean()['Book-Rating'].reset_index().rename(columns={'Book-Rating':"avg_rating"})

In [5]:
popularity_df = num_ratings_df.merge(avg_rating_df, on='Book-Title')
popularity_df

Unnamed: 0,Book-Title,num_ratings,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,4,2.250000
1,Always Have Popsicles,1,0.000000
2,Apple Magic (The Collector's series),1,0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.000000
5,Clifford Visita El Hospital (Clifford El Gran...,1,0.000000
6,Dark Justice,1,10.000000
7,Deceived,2,0.000000
8,Earth Prayers From around the World: 365 Pray...,10,5.000000
9,Final Fantasy Anthology: Official Strategy Gu...,4,5.000000


In [6]:
# keep only books with more than 250 ratings received
popularity_df = popularity_df[popularity_df['num_ratings'] > 250].sort_values('avg_rating', ascending=False)
popularity_df

Unnamed: 0,Book-Title,num_ratings,avg_rating
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289
80441,Harry Potter and the Sorcerer's Stone (Book 1),278,5.737410
80426,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453
191612,The Hobbit : The Enchanting Prelude to The Lor...,281,5.007117
187377,The Fellowship of the Ring (The Lord of the Ri...,368,4.948370
80445,Harry Potter and the Sorcerer's Stone (Harry P...,575,4.895652
211384,"The Two Towers (The Lord of the Rings, Part 2)",260,4.880769
219741,To Kill a Mockingbird,510,4.700000


In [7]:
# merge on books to get other info, dedup, take top 50 and select wanted columns
popularity_df = popularity_df.merge(books, on='Book-Title').drop_duplicates('Book-Title').head(50)[['Book-Title', 'Book-Author', 'Image-URL-M', 'num_ratings', 'avg_rating']]

## Collaborative-filtering

In [8]:
filter_user_indexing = ratings_with_books.groupby('User-ID').count()['Book-Rating'] > 200
filtered_users = filter_user_indexing[filter_user_indexing].index

In [9]:
ratings_by_filtered_users = ratings_with_books[ratings_with_books['User-ID'].isin(filtered_users)]

In [10]:
filter_book_indexing = ratings_by_filtered_users.groupby('Book-Title').count()['Book-Rating'] >= 50
filtered_books = filter_book_indexing[filter_book_indexing].index

In [11]:
processed_ratings = ratings_by_filtered_users[ratings_by_filtered_users['Book-Title'].isin(filtered_books)].drop_duplicates()

In [12]:
# Ratings only by users with > 250 ratings given and books with >= 50 ratings received
pt = processed_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating').fillna(0)

In [13]:
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
similarity_scores = cosine_similarity(pt)

In [15]:
similarity_scores.shape

(706, 706)

In [16]:
pt.index[545]

"The Handmaid's Tale"

In [17]:
import pickle
pickle.dump(popularity_df, open('popularity.pkl', 'wb'))

In [29]:
pickle.dump(pt, open('pt.pkl', 'wb'))
pickle.dump(books, open('books.pkl', 'wb'))
pickle.dump(similarity_scores, open('similarity_scores.pkl', 'wb'))
pickle.dump(list(filtered_books), open('filtered_books.pkl', 'wb'))

In [21]:
pickle.dumbook_titles

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
5,0399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...
6,0425176428,What If?: The World's Foremost Military Histor...,Robert Cowley,2000,Berkley Publishing Group,http://images.amazon.com/images/P/0425176428.0...,http://images.amazon.com/images/P/0425176428.0...,http://images.amazon.com/images/P/0425176428.0...
7,0671870432,PLEADING GUILTY,Scott Turow,1993,Audioworks,http://images.amazon.com/images/P/0671870432.0...,http://images.amazon.com/images/P/0671870432.0...,http://images.amazon.com/images/P/0671870432.0...
8,0679425608,Under the Black Flag: The Romance and the Real...,David Cordingly,1996,Random House,http://images.amazon.com/images/P/0679425608.0...,http://images.amazon.com/images/P/0679425608.0...,http://images.amazon.com/images/P/0679425608.0...
9,074322678X,Where You'll Find Me: And Other Stories,Ann Beattie,2002,Scribner,http://images.amazon.com/images/P/074322678X.0...,http://images.amazon.com/images/P/074322678X.0...,http://images.amazon.com/images/P/074322678X.0...


In [35]:
popularity_df.rename(columns={"Book-Title": "title", "Book-Author": "author", "Image-URL-M": "url"}).to_dict('records')

[{'title': 'Harry Potter and the Prisoner of Azkaban (Book 3)',
  'author': 'J. K. Rowling',
  'url': 'http://images.amazon.com/images/P/0439136350.01.MZZZZZZZ.jpg',
  'num_ratings': 428,
  'avg_rating': 5.852803738317757},
 {'title': 'Harry Potter and the Goblet of Fire (Book 4)',
  'author': 'J. K. Rowling',
  'url': 'http://images.amazon.com/images/P/0439139597.01.MZZZZZZZ.jpg',
  'num_ratings': 387,
  'avg_rating': 5.8242894056847545},
 {'title': "Harry Potter and the Sorcerer's Stone (Book 1)",
  'author': 'J. K. Rowling',
  'url': 'http://images.amazon.com/images/P/0590353403.01.MZZZZZZZ.jpg',
  'num_ratings': 278,
  'avg_rating': 5.737410071942446},
 {'title': 'Harry Potter and the Order of the Phoenix (Book 5)',
  'author': 'J. K. Rowling',
  'url': 'http://images.amazon.com/images/P/043935806X.01.MZZZZZZZ.jpg',
  'num_ratings': 347,
  'avg_rating': 5.501440922190202},
 {'title': 'Harry Potter and the Chamber of Secrets (Book 2)',
  'author': 'J. K. Rowling',
  'url': 'http://i

In [36]:
user_input = '1984'

In [38]:
index = np.where(pt.index == user_input)[0][0]
similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:5]

In [39]:
similar_items

[(47, 0.2702651417103732),
 (545, 0.26396193711234966),
 (82, 0.2366937434740099),
 (634, 0.23299389358170397)]

In [42]:
item = []
temp_df = books[books['Book-Title'] == pt.index[similar_items[0][0]]]

In [44]:
temp_df.drop_duplicates('Book-Title')['Book-Title'].values

array(['Animal Farm'], dtype=object)

In [45]:
temp_df

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
285,451526341,Animal Farm,George Orwell,2004,Signet,http://images.amazon.com/images/P/0451526341.0...,http://images.amazon.com/images/P/0451526341.0...,http://images.amazon.com/images/P/0451526341.0...
4878,451522303,Animal Farm,George Orwell,1956,Signet Book,http://images.amazon.com/images/P/0451522303.0...,http://images.amazon.com/images/P/0451522303.0...,http://images.amazon.com/images/P/0451522303.0...
5912,451524667,Animal Farm,George Orwell,1990,New Amer Library Classics,http://images.amazon.com/images/P/0451524667.0...,http://images.amazon.com/images/P/0451524667.0...,http://images.amazon.com/images/P/0451524667.0...
8985,736605673,Animal Farm,George Orwell,1981,Books on Tape,http://images.amazon.com/images/P/0736605673.0...,http://images.amazon.com/images/P/0736605673.0...,http://images.amazon.com/images/P/0736605673.0...
52635,451518012,Animal Farm,George Orwell,1956,Signet Book,http://images.amazon.com/images/P/0451518012.0...,http://images.amazon.com/images/P/0451518012.0...,http://images.amazon.com/images/P/0451518012.0...
53668,451521560,Animal Farm,George Orwell,1986,New Amer Library Classics (Mm),http://images.amazon.com/images/P/0451521560.0...,http://images.amazon.com/images/P/0451521560.0...,http://images.amazon.com/images/P/0451521560.0...
115681,451519000,Animal Farm,George Orwell,1956,Penguin Putnam~mass,http://images.amazon.com/images/P/0451519000.0...,http://images.amazon.com/images/P/0451519000.0...,http://images.amazon.com/images/P/0451519000.0...
116165,151072558,Animal Farm,George Orwell,1990,Harcourt,http://images.amazon.com/images/P/0151072558.0...,http://images.amazon.com/images/P/0151072558.0...,http://images.amazon.com/images/P/0151072558.0...
131735,451520874,Animal Farm,George Orwell,1956,Signet Book,http://images.amazon.com/images/P/0451520874.0...,http://images.amazon.com/images/P/0451520874.0...,http://images.amazon.com/images/P/0451520874.0...
209114,1854597892,Animal Farm,George Orwell,2004,Nick Hern Books,http://images.amazon.com/images/P/1854597892.0...,http://images.amazon.com/images/P/1854597892.0...,http://images.amazon.com/images/P/1854597892.0...
