In [93]:
import numpy as np
import pandas as pd

In [94]:
import warnings
warnings.filterwarnings('ignore')

In [95]:
books = pd.read_csv('Books.csv')
users = pd.read_csv('Users.csv')
ratings = pd.read_csv('Ratings.csv')

In [96]:
books.head(1)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...


In [97]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [98]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [99]:
print(books.shape)
print(users.shape)
print(ratings.shape)

(271360, 8)
(278858, 3)
(1149780, 3)


### Checking Null Values

In [100]:
books.isnull().sum()

ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [101]:
users.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

In [102]:
ratings.isnull().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

### Checking Duplicated Values

In [103]:
books.duplicated().sum()

0

In [104]:
users.duplicated().sum()

0

In [105]:
ratings.duplicated().sum()

0

### Popularity Based Recommender System

In [123]:
ratings_with_name = ratings.merge(books,on='ISBN')

In [124]:
ratings_with_name.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...


In [125]:
# Number of Ratings per Book

num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()

num_rating_df.rename(columns={'Book-Rating':'Num_Ratings'}, inplace=True)

num_rating_df.head()

Unnamed: 0,Book-Title,Num_Ratings
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1


In [126]:
# Average Rating per Book

avg_rating_df = ratings_with_name.groupby('Book-Title')['Book-Rating'].mean().to_frame()

avg_rating_df.rename(columns={'Book-Rating':'Avg_Ratings'}, inplace=True)

avg_rating_df.head()

Unnamed: 0_level_0,Avg_Ratings
Book-Title,Unnamed: 1_level_1
"A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)",2.25
Always Have Popsicles,0.0
Apple Magic (The Collector's series),0.0
"Ask Lily (Young Women of Faith: Lily Series, Book 5)",8.0
Beyond IBM: Leadership Marketing and Finance for the 1990s,0.0


In [127]:
# Merge both of them into a single Dataframe

popular_df = num_rating_df.merge(avg_rating_df,on='Book-Title')

popular_df.head()

Unnamed: 0,Book-Title,Num_Ratings,Avg_Ratings
0,A Light in the Storm: The Civil War Diary of ...,4,2.25
1,Always Have Popsicles,1,0.0
2,Apple Magic (The Collector's series),1,0.0
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.0
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.0


In [128]:
# Merging with the main books dataframe to have all related data

popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-S','Image-URL-M','Image-URL-L','Num_Ratings','Avg_Ratings']]

In [129]:
popular_df.head(10)

Unnamed: 0,Book-Title,Book-Author,Image-URL-S,Image-URL-M,Image-URL-L,Num_Ratings,Avg_Ratings
0,A Light in the Storm: The Civil War Diary of ...,Karen Hesse,http://images.amazon.com/images/P/0590567330.0...,http://images.amazon.com/images/P/0590567330.0...,http://images.amazon.com/images/P/0590567330.0...,4,2.25
1,Always Have Popsicles,Rebecca Harvin,http://images.amazon.com/images/P/0964147726.0...,http://images.amazon.com/images/P/0964147726.0...,http://images.amazon.com/images/P/0964147726.0...,1,0.0
2,Apple Magic (The Collector's series),Martina Boudreau,http://images.amazon.com/images/P/0942320093.0...,http://images.amazon.com/images/P/0942320093.0...,http://images.amazon.com/images/P/0942320093.0...,1,0.0
3,"Ask Lily (Young Women of Faith: Lily Series, ...",Nancy N. Rue,http://images.amazon.com/images/P/0310232546.0...,http://images.amazon.com/images/P/0310232546.0...,http://images.amazon.com/images/P/0310232546.0...,1,8.0
4,Beyond IBM: Leadership Marketing and Finance ...,Lou Mobley,http://images.amazon.com/images/P/0962295701.0...,http://images.amazon.com/images/P/0962295701.0...,http://images.amazon.com/images/P/0962295701.0...,1,0.0
5,Clifford Visita El Hospital (Clifford El Gran...,Norman Bridwell,http://images.amazon.com/images/P/0439188970.0...,http://images.amazon.com/images/P/0439188970.0...,http://images.amazon.com/images/P/0439188970.0...,1,0.0
6,Dark Justice,Jack Higgins,http://images.amazon.com/images/P/0399151788.0...,http://images.amazon.com/images/P/0399151788.0...,http://images.amazon.com/images/P/0399151788.0...,1,10.0
7,Deceived,Carla Simpson,http://images.amazon.com/images/P/0786000015.0...,http://images.amazon.com/images/P/0786000015.0...,http://images.amazon.com/images/P/0786000015.0...,2,0.0
8,Earth Prayers From around the World: 365 Pray...,Elizabeth Roberts,http://images.amazon.com/images/P/006250746X.0...,http://images.amazon.com/images/P/006250746X.0...,http://images.amazon.com/images/P/006250746X.0...,10,5.0
9,Final Fantasy Anthology: Official Strategy Gu...,David Cassady,http://images.amazon.com/images/P/1566869250.0...,http://images.amazon.com/images/P/1566869250.0...,http://images.amazon.com/images/P/1566869250.0...,4,5.0


In [130]:
popular_df = popular_df[popular_df['Num_Ratings']>=250].sort_values('Avg_Ratings',ascending=False)

In [131]:
popular_df.head(10)

Unnamed: 0,Book-Title,Book-Author,Image-URL-S,Image-URL-M,Image-URL-L,Num_Ratings,Avg_Ratings
89972,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,428,5.852804
89954,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,387,5.824289
89982,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...,278,5.73741
89959,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,347,5.501441
89944,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...,556,5.183453
214749,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339681.0...,http://images.amazon.com/images/P/0345339681.0...,http://images.amazon.com/images/P/0345339681.0...,281,5.007117
209880,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339703.0...,http://images.amazon.com/images/P/0345339703.0...,http://images.amazon.com/images/P/0345339703.0...,368,4.94837
89989,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,http://images.amazon.com/images/P/059035342X.0...,http://images.amazon.com/images/P/059035342X.0...,http://images.amazon.com/images/P/059035342X.0...,575,4.895652
237361,"The Two Towers (The Lord of the Rings, Part 2)",J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339711.0...,http://images.amazon.com/images/P/0345339711.0...,http://images.amazon.com/images/P/0345339711.0...,260,4.880769
246689,To Kill a Mockingbird,Harper Lee,http://images.amazon.com/images/P/0446310786.0...,http://images.amazon.com/images/P/0446310786.0...,http://images.amazon.com/images/P/0446310786.0...,510,4.7


In [133]:
# Convert DataFrame to a list of dictionaries for pickel export

popular_df_dict = popular_df.to_dict(orient='records')

print(popular_df_dict)

[{'Book-Title': 'Harry Potter and the Prisoner of Azkaban (Book 3)', 'Book-Author': 'J. K. Rowling', 'Image-URL-S': 'http://images.amazon.com/images/P/0439136350.01.THUMBZZZ.jpg', 'Image-URL-M': 'http://images.amazon.com/images/P/0439136350.01.MZZZZZZZ.jpg', 'Image-URL-L': 'http://images.amazon.com/images/P/0439136350.01.LZZZZZZZ.jpg', 'Num_Ratings': 428, 'Avg_Ratings': 5.852803738317757}, {'Book-Title': 'Harry Potter and the Goblet of Fire (Book 4)', 'Book-Author': 'J. K. Rowling', 'Image-URL-S': 'http://images.amazon.com/images/P/0439139597.01.THUMBZZZ.jpg', 'Image-URL-M': 'http://images.amazon.com/images/P/0439139597.01.MZZZZZZZ.jpg', 'Image-URL-L': 'http://images.amazon.com/images/P/0439139597.01.LZZZZZZZ.jpg', 'Num_Ratings': 387, 'Avg_Ratings': 5.8242894056847545}, {'Book-Title': "Harry Potter and the Sorcerer's Stone (Book 1)", 'Book-Author': 'J. K. Rowling', 'Image-URL-S': 'http://images.amazon.com/images/P/0590353403.01.THUMBZZZ.jpg', 'Image-URL-M': 'http://images.amazon.com/im

In [136]:
popular_df_dict[0]

{'Book-Title': 'Harry Potter and the Prisoner of Azkaban (Book 3)',
 'Book-Author': 'J. K. Rowling',
 'Image-URL-S': 'http://images.amazon.com/images/P/0439136350.01.THUMBZZZ.jpg',
 'Image-URL-M': 'http://images.amazon.com/images/P/0439136350.01.MZZZZZZZ.jpg',
 'Image-URL-L': 'http://images.amazon.com/images/P/0439136350.01.LZZZZZZZ.jpg',
 'Num_Ratings': 428,
 'Avg_Ratings': 5.852803738317757}