In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
dtype = {
    'ISBN': str,
    'Book-Title': str,
    'Book-Author': str,
    'Year-Of-Publication': str,
    'Publisher': str,
    'Image-URL-S': str,
    'Image-URL-M': str,
    'Image-URL-L': str
}
books = pd.read_csv('dataset/Books.csv', sep=',',
    encoding='utf-8',
    dtype=dtype)
# books = pd.read_csv("./dataset/Books.csv")
users = pd.read_csv("./dataset/Users.csv")
ratings = pd.read_csv("./dataset/Ratings.csv")

In [3]:
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [4]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [5]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [6]:
books.shape[0]

271360

In [7]:
books.isnull().sum()

ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [8]:
users.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

In [9]:
ratings.isnull().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [10]:
books.duplicated().sum()

0

In [11]:
ratings.duplicated().sum()

0

In [12]:
users.duplicated().sum()

0

## Popularity based recommended system

Render books with top 50 books with highest average ratings with minimum 250 users rated those books.

In [13]:
# firstly merge rating and books on ISBN column

ratings_with_book_name = ratings.merge(books, on="ISBN")

In [14]:
ratings_with_book_name.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...


In [15]:
# firstly put Group by on "Book title" and then find the number of votes rated on each book.
number_of_rating_dataframe = ratings_with_book_name.groupby("Book-Title").count()['Book-Rating']

In [16]:
number_of_rating_dataframe.head()

Book-Title
 A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)    4
 Always Have Popsicles                                                                                        1
 Apple Magic (The Collector's series)                                                                         1
 Ask Lily (Young Women of Faith: Lily Series, Book 5)                                                         1
 Beyond IBM: Leadership Marketing and Finance for the 1990s                                                   1
Name: Book-Rating, dtype: int64

In [55]:
book_rating_df = number_of_rating_dataframe.reset_index()
book_rating_df.head()

Unnamed: 0,Book-Title,Book-Rating
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1


In [56]:
# rename column from Book-Rating to total number of ratings 
book_rating_df.rename(columns={'Book-Rating':'Number of Ratings'}, inplace=True)
book_rating_df.head()

Unnamed: 0,Book-Title,Number of Ratings
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1


In [19]:
ratings_with_book_name['Book-Title']
np.where(ratings_with_book_name['Book-Title'] == '0590567330059056733005905673300590567330')[0]

array([], dtype=int64)

In [20]:
ratings_with_book_name['Book-Rating']
np.where(ratings_with_book_name['Book-Rating'] == '0590567330059056733005905673300590567330')[0]

array([], dtype=int64)

In [21]:
# for the same, instead of count, we will use mean
average_of_rating_dataframe = ratings_with_book_name.groupby("Book-Title")['Book-Rating'].agg('mean')

In [22]:
average_of_rating_dataframe.head()

Book-Title
 A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)    2.25
 Always Have Popsicles                                                                                        0.00
 Apple Magic (The Collector's series)                                                                         0.00
 Ask Lily (Young Women of Faith: Lily Series, Book 5)                                                         8.00
 Beyond IBM: Leadership Marketing and Finance for the 1990s                                                   0.00
Name: Book-Rating, dtype: float64

In [57]:
avg_book_rating_df = average_of_rating_dataframe.reset_index()
avg_book_rating_df.rename(columns={'Book-Rating':'Average Ratings'}, inplace=True)
avg_book_rating_df.head()

Unnamed: 0,Book-Title,Average Ratings
0,A Light in the Storm: The Civil War Diary of ...,2.25
1,Always Have Popsicles,0.0
2,Apple Magic (The Collector's series),0.0
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.0
4,Beyond IBM: Leadership Marketing and Finance ...,0.0


In [24]:
# firstly we will merge avg_book_rating_df and total book_rating_df.
merged_books = book_rating_df.merge(avg_book_rating_df, on="Book-Title")
merged_books

Unnamed: 0,Book-Title,Number of Ratings,Average Ratings
0,A Light in the Storm: The Civil War Diary of ...,4,2.250000
1,Always Have Popsicles,1,0.000000
2,Apple Magic (The Collector's series),1,0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.000000
...,...,...,...
241066,Ã?Â?lpiraten.,2,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
241068,Ã?Â?sterlich leben.,1,7.000000
241069,Ã?Â?stlich der Berge.,3,2.666667


In [25]:
# then we will filter only those books, which has more than 250 ratings
popular_books= merged_books.loc[merged_books['Number of Ratings']>=250]
popular_books

Unnamed: 0,Book-Title,Number of Ratings,Average Ratings
764,1984,284,4.454225
818,1st to Die: A Novel,509,3.575639
1048,2nd Chance,356,3.269663
1760,A Bend in the Road,346,3.364162
2281,"A Child Called \It\"": One Child's Courage to S...",265,4.086792
...,...,...,...
233850,White Oleander : A Novel,387,3.506460
233851,White Oleander : A Novel (Oprah's Book Club),356,3.772472
234740,Wicked: The Life and Times of the Wicked Witch...,326,3.766871
234951,Wild Animus,2502,1.019584


In [26]:
popular_books = popular_books.sort_values(by="Average Ratings", ascending=False)
popular_books

Unnamed: 0,Book-Title,Number of Ratings,Average Ratings
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289
80441,Harry Potter and the Sorcerer's Stone (Book 1),278,5.737410
80426,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453
...,...,...,...
227701,Vinegar Hill (Oprah's Book Club (Paperback)),265,2.245283
233635,Whispers,286,2.199301
143377,Presumed Innocent,294,2.139456
94382,Isle of Dogs,288,2.000000


In [27]:
# get only top 50 books
popular_books = popular_books.head(50)
popular_books

Unnamed: 0,Book-Title,Number of Ratings,Average Ratings
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289
80441,Harry Potter and the Sorcerer's Stone (Book 1),278,5.73741
80426,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453
191612,The Hobbit : The Enchanting Prelude to The Lor...,281,5.007117
187377,The Fellowship of the Ring (The Lord of the Ri...,368,4.94837
80445,Harry Potter and the Sorcerer's Stone (Harry P...,575,4.895652
211384,"The Two Towers (The Lord of the Rings, Part 2)",260,4.880769
219741,To Kill a Mockingbird,510,4.7


In [28]:
# now we want Author name, publisher and image
# so we will merge it with main data of books
popular_books_with_authorname_and_image = popular_books.merge(books, on='Book-Title')
popular_books_with_authorname_and_image.head()

Unnamed: 0,Book-Title,Number of Ratings,Average Ratings,ISBN,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,439136350,J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...
1,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,439136369,J. K. Rowling,2001,Scholastic,http://images.amazon.com/images/P/0439136369.0...,http://images.amazon.com/images/P/0439136369.0...,http://images.amazon.com/images/P/0439136369.0...
2,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,786222743,J. K. Rowling,2000,Thorndike Press,http://images.amazon.com/images/P/0786222743.0...,http://images.amazon.com/images/P/0786222743.0...,http://images.amazon.com/images/P/0786222743.0...
3,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,439139597,J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...
4,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,439139600,J. K. Rowling,2002,Scholastic Paperbacks,http://images.amazon.com/images/P/0439139600.0...,http://images.amazon.com/images/P/0439139600.0...,http://images.amazon.com/images/P/0439139600.0...


In [29]:
# here we are having duplicate book names with different ISBN.
# so we will drop duplicated records with respect to Book name
popular_books_with_authorname_and_image = popular_books_with_authorname_and_image.drop_duplicates('Book-Title')
popular_books_with_authorname_and_image.head()

Unnamed: 0,Book-Title,Number of Ratings,Average Ratings,ISBN,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,0439136350,J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...
3,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,0439139597,J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...
5,Harry Potter and the Sorcerer's Stone (Book 1),278,5.73741,0590353403,J. K. Rowling,1998,Scholastic,http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...
9,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441,043935806X,J. K. Rowling,2003,Scholastic,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...
13,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453,0439064872,J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...


In [30]:
# now we will only needed specific columns, like Book Title, Image-URL-S, Image-URL-M, Book-Author, Average Rating and Number of Ratings
final_books = popular_books_with_authorname_and_image[['Book-Title', 'Image-URL-S', 'Image-URL-M', 'Book-Author', 'Average Ratings', 'Number of Ratings']]
final_books.head()

Unnamed: 0,Book-Title,Image-URL-S,Image-URL-M,Book-Author,Average Ratings,Number of Ratings
0,Harry Potter and the Prisoner of Azkaban (Book 3),http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,J. K. Rowling,5.852804,428
3,Harry Potter and the Goblet of Fire (Book 4),http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,J. K. Rowling,5.824289,387
5,Harry Potter and the Sorcerer's Stone (Book 1),http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...,J. K. Rowling,5.73741,278
9,Harry Potter and the Order of the Phoenix (Boo...,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,J. K. Rowling,5.501441,347
13,Harry Potter and the Chamber of Secrets (Book 2),http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...,J. K. Rowling,5.183453,556


## Collaborative Filtering based recommended system

| Books  | User 1  | User 2  |
| ------ | ------- | ------- |
| Book1  | Rating1 | Rating4 |
| Book2  | Rating2 | Rating5 |
| Book3  | Rating3 | Rating6 |

In [31]:
# now we will pick those users who have voted 200+ ratings and only those books which have atleast rated 50 times.



In [32]:
ratings_with_book_name

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
...,...,...,...,...,...,...,...,...,...,...
1031131,276688,0517145553,0,Mostly Harmless,Douglas Adams,1995,Random House Value Pub,http://images.amazon.com/images/P/0517145553.0...,http://images.amazon.com/images/P/0517145553.0...,http://images.amazon.com/images/P/0517145553.0...
1031132,276688,1575660792,7,Gray Matter,Shirley Kennett,1996,Kensington Publishing Corporation,http://images.amazon.com/images/P/1575660792.0...,http://images.amazon.com/images/P/1575660792.0...,http://images.amazon.com/images/P/1575660792.0...
1031133,276690,0590907301,0,Triplet Trouble and the Class Trip (Triplet Tr...,Debbie Dadey,1997,Apple,http://images.amazon.com/images/P/0590907301.0...,http://images.amazon.com/images/P/0590907301.0...,http://images.amazon.com/images/P/0590907301.0...
1031134,276704,0679752714,0,A Desert of Pure Feeling (Vintage Contemporaries),Judith Freeman,1997,Vintage Books USA,http://images.amazon.com/images/P/0679752714.0...,http://images.amazon.com/images/P/0679752714.0...,http://images.amazon.com/images/P/0679752714.0...


In [33]:
users_with_rating_count = ratings_with_book_name.groupby(by="User-ID").count()["Book-Rating"]
user_id_with_rating_count = users_with_rating_count.reset_index()
user_id_with_rating_count

Unnamed: 0,User-ID,Book-Rating
0,2,1
1,8,17
2,9,3
3,10,1
4,12,1
...,...,...
92101,278846,1
92102,278849,4
92103,278851,23
92104,278852,1


In [34]:
#filter only those users, which has rated more than 200 times
users_with_high_rating_count = user_id_with_rating_count.loc[user_id_with_rating_count['Book-Rating']>=200]
users_with_high_rating_count

Unnamed: 0,User-ID,Book-Rating
87,254,300
698,2276,456
862,2766,269
919,2977,227
1033,3363,890
...,...,...
90587,274308,1293
91112,275970,1325
91564,277427,490
91639,277639,265


In [35]:
# now filter books where UserID matches with users with high rating count. 
filtered_by_book_rating = ratings_with_book_name.loc[ratings_with_book_name['User-ID'].isin(users_with_high_rating_count['User-ID'])]
filtered_by_book_rating.sort_values(by="User-ID")

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
192935,254,0141301066,0,Matilda,Roald Dahl,1998,Puffin Books,http://images.amazon.com/images/P/0141301066.0...,http://images.amazon.com/images/P/0141301066.0...,http://images.amazon.com/images/P/0141301066.0...
195091,254,0441003257,0,Good Omens,Neil Gaiman,1996,Ace Books,http://images.amazon.com/images/P/0441003257.0...,http://images.amazon.com/images/P/0441003257.0...,http://images.amazon.com/images/P/0441003257.0...
195035,254,0440403278,0,Number the Stars (Yearling Newbery),Lois Lowry,1990,Yearling Books,http://images.amazon.com/images/P/0440403278.0...,http://images.amazon.com/images/P/0440403278.0...,http://images.amazon.com/images/P/0440403278.0...
100360,254,051512317X,0,Rising Tides,Nora Roberts,2001,Jove Books,http://images.amazon.com/images/P/051512317X.0...,http://images.amazon.com/images/P/051512317X.0...,http://images.amazon.com/images/P/051512317X.0...
194953,254,0440241413,0,Confessions of a Shopaholic,SOPHIE KINSELLA,2003,Dell,http://images.amazon.com/images/P/0440241413.0...,http://images.amazon.com/images/P/0440241413.0...,http://images.amazon.com/images/P/0440241413.0...
...,...,...,...,...,...,...,...,...,...,...
156019,278418,0515130168,0,Active Measures,Alexander Court,2001,Jove Books,http://images.amazon.com/images/P/0515130168.0...,http://images.amazon.com/images/P/0515130168.0...,http://images.amazon.com/images/P/0515130168.0...
156022,278418,0515130524,0,The Fighting Agents (Men at War (Paperback Jove)),W. E. B. Griffin,2001,Jove Books,http://images.amazon.com/images/P/0515130524.0...,http://images.amazon.com/images/P/0515130524.0...,http://images.amazon.com/images/P/0515130524.0...
156035,278418,0515131229,8,Dance upon the Air (Three Sisters Island Trilogy),Nora Roberts,2003,Jove Books,http://images.amazon.com/images/P/0515131229.0...,http://images.amazon.com/images/P/0515131229.0...,http://images.amazon.com/images/P/0515131229.0...
155694,278418,0515127833,0,River's End,Nora Roberts,2003,Jove Books,http://images.amazon.com/images/P/0515127833.0...,http://images.amazon.com/images/P/0515127833.0...,http://images.amazon.com/images/P/0515127833.0...


In [36]:
#check if there is any duplicate, then drop that.
filtered_by_book_rating = filtered_by_book_rating.drop_duplicates()


In [37]:
# now group filtered_by_book_rating by book title
books_with_rating = filtered_by_book_rating.groupby(by="Book-Title").count()['Book-Rating']
books_with_rating

Book-Title
 A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)    2
 Always Have Popsicles                                                                                        1
 Apple Magic (The Collector's series)                                                                         1
 Beyond IBM: Leadership Marketing and Finance for the 1990s                                                   1
 Clifford Visita El Hospital (Clifford El Gran Perro Colorado)                                                1
                                                                                                             ..
Ã?Â?ber das Fernsehen.                                                                                        2
Ã?Â?ber die Pflicht zum Ungehorsam gegen den Staat.                                                           3
Ã?Â?lpiraten.                                                                                

In [38]:
books_with_rating = books_with_rating.reset_index()
books_with_high_rating = books_with_rating.loc[books_with_rating['Book-Rating'] > 50]
books_with_high_rating

Unnamed: 0,Book-Title,Book-Rating
484,1984,71
518,1st to Die: A Novel,160
653,2nd Chance,123
797,4 Blondes,70
1118,A Bend in the Road,114
...,...,...
154964,Year of Wonders,57
155186,You Belong To Me,55
155811,Zen and the Art of Motorcycle Maintenance: An ...,62
155952,Zoya,59


In [39]:
filtered_by_high_book_rating = filtered_by_book_rating.loc[filtered_by_book_rating["Book-Title"].isin(books_with_high_rating["Book-Title"])]
filtered_by_high_book_rating

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
63,278418,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
65,3363,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
66,7158,0446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
69,11676,0446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
74,23768,0446520802,6,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
...,...,...,...,...,...,...,...,...,...,...
1026724,266865,0531001725,10,The Catcher in the Rye,Jerome David Salinger,1973,Scholastic Library Pub,http://images.amazon.com/images/P/0531001725.0...,http://images.amazon.com/images/P/0531001725.0...,http://images.amazon.com/images/P/0531001725.0...
1027923,269566,0670809381,0,Echoes,Maeve Binchy,1986,Penguin USA,http://images.amazon.com/images/P/0670809381.0...,http://images.amazon.com/images/P/0670809381.0...,http://images.amazon.com/images/P/0670809381.0...
1028777,271284,0440910927,0,The Rainmaker,John Grisham,1995,Island,http://images.amazon.com/images/P/0440910927.0...,http://images.amazon.com/images/P/0440910927.0...,http://images.amazon.com/images/P/0440910927.0...
1029070,271705,B0001PIOX4,0,Fahrenheit 451,Ray Bradbury,1993,Simon &amp; Schuster,http://images.amazon.com/images/P/B0001PIOX4.0...,http://images.amazon.com/images/P/B0001PIOX4.0...,http://images.amazon.com/images/P/B0001PIOX4.0...


In [40]:
final_books_with_high_rating_and_famous_users = filtered_by_high_book_rating
final_books_with_high_rating_and_famous_users

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
63,278418,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
65,3363,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
66,7158,0446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
69,11676,0446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
74,23768,0446520802,6,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
...,...,...,...,...,...,...,...,...,...,...
1026724,266865,0531001725,10,The Catcher in the Rye,Jerome David Salinger,1973,Scholastic Library Pub,http://images.amazon.com/images/P/0531001725.0...,http://images.amazon.com/images/P/0531001725.0...,http://images.amazon.com/images/P/0531001725.0...
1027923,269566,0670809381,0,Echoes,Maeve Binchy,1986,Penguin USA,http://images.amazon.com/images/P/0670809381.0...,http://images.amazon.com/images/P/0670809381.0...,http://images.amazon.com/images/P/0670809381.0...
1028777,271284,0440910927,0,The Rainmaker,John Grisham,1995,Island,http://images.amazon.com/images/P/0440910927.0...,http://images.amazon.com/images/P/0440910927.0...,http://images.amazon.com/images/P/0440910927.0...
1029070,271705,B0001PIOX4,0,Fahrenheit 451,Ray Bradbury,1993,Simon &amp; Schuster,http://images.amazon.com/images/P/B0001PIOX4.0...,http://images.amazon.com/images/P/B0001PIOX4.0...,http://images.amazon.com/images/P/B0001PIOX4.0...


In [41]:
pt = final_books_with_high_rating_and_famous_users.pivot_table(index="Book-Title", columns="User-ID", values="Book-Rating")
pt

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,10.0,,,,,,0.0,,,
1st to Die: A Novel,,,,,,,,,,9.0,...,,,,,,,,,,
2nd Chance,,10.0,,,,,,,,0.0,...,,,,,,0.0,,,0.0,
4 Blondes,,,,,,,,0.0,,,...,,,,,,,,,,
A Bend in the Road,0.0,,7.0,,,,,,,,...,,0.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,,,,7.0,,,,,,0.0,...,,9.0,,,,,0.0,,,
You Belong To Me,,,,,,,,,0.0,,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,,0.0,,,0.0,,,...,,,,,,,0.0,,,
Zoya,,,,,,,,,,,...,,0.0,,,,,,,,


In [42]:
pt = pt.fillna(value=0.)

Now this is equivalent to required structure:



| Books  | User 1  | User 2  |
| ------ | ------- | ------- |
| Book1  | Rating1 | Rating4 |
| Book2  | Rating2 | Rating5 |
| Book3  | Rating3 | Rating6 |

In [43]:
similarity_scores = cosine_similarity(pt)

In [44]:
similarity_scores

array([[1.        , 0.0999137 , 0.01189468, ..., 0.11799012, 0.07158663,
        0.04205081],
       [0.0999137 , 1.        , 0.2364573 , ..., 0.07446129, 0.16773875,
        0.14263397],
       [0.01189468, 0.2364573 , 1.        , ..., 0.04558758, 0.04938579,
        0.10796119],
       ...,
       [0.11799012, 0.07446129, 0.04558758, ..., 1.        , 0.07085128,
        0.0196177 ],
       [0.07158663, 0.16773875, 0.04938579, ..., 0.07085128, 1.        ,
        0.10602962],
       [0.04205081, 0.14263397, 0.10796119, ..., 0.0196177 , 0.10602962,
        1.        ]])

In [45]:
similarity_scores.shape

(683, 683)

In [46]:
pt.index[0]

'1984'

In [47]:
#find index of the book in pt.
index_1 = np.where(pt.index =='1984')[0][0]
index_1

0

In [48]:
index_2 = np.where(pt.index.map(lambda x: 'Is for Outlaw' in x))[0][0]
index_2

682

In [49]:
distance = similarity_scores[index_1]
print(distance)
enumerate(distance)

[1.         0.0999137  0.01189468 0.         0.05229234 0.02703559
 0.08005247 0.133798   0.03177829 0.03573298 0.0226271  0.06592522
 0.020304   0.09425025 0.08089641 0.10866788 0.0496928  0.02453052
 0.11405415 0.         0.13965274 0.07645776 0.05992325 0.08499677
 0.         0.06829594 0.13307519 0.07404925 0.11854319 0.007483
 0.01435345 0.         0.07761015 0.04406341 0.0151626  0.09251799
 0.01776199 0.02543351 0.07778745 0.11379679 0.0554492  0.08139372
 0.08254087 0.08560054 0.05350252 0.0534403  0.26331669 0.09527704
 0.05861375 0.08728505 0.06575169 0.         0.04353225 0.01871487
 0.         0.05484345 0.00531237 0.07674541 0.05085288 0.18422052
 0.         0.01208759 0.0290784  0.04169477 0.12354122 0.16140808
 0.         0.1301383  0.06445396 0.         0.         0.
 0.10686088 0.02734449 0.04405541 0.0254609  0.06699347 0.01297391
 0.10248694 0.03167165 0.01328403 0.2306084  0.         0.10130612
 0.06009182 0.13811804 0.11361279 0.08116824 0.00829019 0.0654278
 0.   

<enumerate at 0x2913a8780>

In [50]:
list(enumerate(distance))

[(0, 0.9999999999999998),
 (1, 0.09991370443681998),
 (2, 0.011894676421575648),
 (3, 0.0),
 (4, 0.0522923428754616),
 (5, 0.027035592129264218),
 (6, 0.08005246987594641),
 (7, 0.13379800079660079),
 (8, 0.03177829126649559),
 (9, 0.03573297811768626),
 (10, 0.022627095695491876),
 (11, 0.06592521898256122),
 (12, 0.02030399594727408),
 (13, 0.09425025422049896),
 (14, 0.08089640554206895),
 (15, 0.10866787678155022),
 (16, 0.049692801008612855),
 (17, 0.024530524976685255),
 (18, 0.11405414701564792),
 (19, 0.0),
 (20, 0.13965274219894008),
 (21, 0.07645775981397283),
 (22, 0.05992324616320607),
 (23, 0.08499677059744355),
 (24, 0.0),
 (25, 0.06829593609316079),
 (26, 0.13307519236960055),
 (27, 0.0740492482078533),
 (28, 0.11854319325391062),
 (29, 0.007482996232171423),
 (30, 0.01435345067390976),
 (31, 0.0),
 (32, 0.07761014992531964),
 (33, 0.04406341116244771),
 (34, 0.015162595382829198),
 (35, 0.09251799313259884),
 (36, 0.017761989688713762),
 (37, 0.02543350839956968),
 (38,

In [51]:
sorted(list(enumerate(distance)), key=lambda x:x[1], reverse=True)

[(0, 0.9999999999999998),
 (46, 0.26331668558969296),
 (527, 0.25717553496685785),
 (81, 0.2306084004654351),
 (613, 0.22700367288326906),
 (533, 0.2204467783292418),
 (176, 0.21650925243379174),
 (484, 0.2142249863657188),
 (300, 0.20469829119944727),
 (335, 0.20143976888534182),
 (536, 0.19510223814372807),
 (504, 0.1945913301271719),
 (510, 0.1915092894769818),
 (472, 0.18790707816989383),
 (534, 0.18715039796581023),
 (309, 0.18515418624826524),
 (603, 0.1843874814079204),
 (577, 0.1843490366438097),
 (59, 0.18422051694704247),
 (601, 0.18094225216441853),
 (528, 0.1793562146567134),
 (626, 0.17633623834710418),
 (197, 0.17585166214330838),
 (523, 0.17528356134820602),
 (568, 0.1693494650818239),
 (519, 0.16571429613724833),
 (526, 0.16298683907968622),
 (65, 0.16140807538784901),
 (115, 0.16102364040924586),
 (261, 0.16078996341819543),
 (345, 0.1591780299676662),
 (461, 0.15690035419158116),
 (396, 0.15628767003519692),
 (570, 0.15588904638387238),
 (644, 0.15485243550124778),
 (

In [52]:
new_index = np.where(pt.index == '2nd Chance')[0][0]
print(new_index)
ecludian_distance = similarity_scores[new_index]
print(ecludian_distance)
enum_value = enumerate(ecludian_distance)
print(enum_value)
item_list = list(enum_value)
item_list

2
[0.01189468 0.2364573  1.         0.         0.06909024 0.10559126
 0.         0.10774375 0.06702185 0.04168195 0.04772156 0.
 0.04758005 0.04445515 0.09877662 0.06505525 0.16998812 0.11381916
 0.11713888 0.03424097 0.08780144 0.19734903 0.         0.10296667
 0.         0.05862648 0.03167738 0.02665072 0.0927721  0.01578198
 0.11200668 0.07976511 0.09795366 0.11151806 0.13297767 0.07759342
 0.09490079 0.10728083 0.098855   0.05052686 0.04079472 0.
 0.02486892 0.10177107 0.04624558 0.16514854 0.0387812  0.04734877
 0.05537103 0.05389246 0.         0.         0.17478183 0.16630236
 0.         0.03372225 0.09187298 0.20224734 0.14105132 0.1393888
 0.09040688 0.02549327 0.1202022  0.03908273 0.04271377 0.04004905
 0.11285355 0.02240551 0.01410128 0.03325543 0.11062089 0.05800659
 0.15450668 0.07858063 0.0997612  0.18042591 0.07849569 0.16762106
 0.08990292 0.21947562 0.24169011 0.04156953 0.07649081 0.01463419
 0.         0.04239252 0.1509391  0.01258731 0.15069871 0.06006632
 0.       

[(0, 0.011894676421575648),
 (1, 0.236457295254443),
 (2, 1.0),
 (3, 0.0),
 (4, 0.06909024214591328),
 (5, 0.10559126382470128),
 (6, 0.0),
 (7, 0.10774375110550646),
 (8, 0.0670218487291342),
 (9, 0.0416819540485443),
 (10, 0.04772156476776279),
 (11, 0.0),
 (12, 0.047580047424453376),
 (13, 0.044455151509031496),
 (14, 0.09877662230505671),
 (15, 0.06505524505788216),
 (16, 0.16998812090738538),
 (17, 0.11381916242700778),
 (18, 0.11713887938926706),
 (19, 0.034240966437880764),
 (20, 0.0878014420489393),
 (21, 0.1973490309636965),
 (22, 0.0),
 (23, 0.1029666684450606),
 (24, 0.0),
 (25, 0.058626484557583125),
 (26, 0.03167737914741039),
 (27, 0.026650717122388885),
 (28, 0.09277209651363727),
 (29, 0.015781976359504266),
 (30, 0.11200667554588858),
 (31, 0.07976510802634351),
 (32, 0.09795365542039074),
 (33, 0.11151806442907786),
 (34, 0.1329776664248242),
 (35, 0.07759341529983549),
 (36, 0.0949007868093896),
 (37, 0.10728083132678148),
 (38, 0.09885500199920219),
 (39, 0.05052685

In [53]:
def recommendBook(book_name):
    # firstly get index
    index = np.where(pt.index == book_name)[0][0]
    ecludian_distance = similarity_scores[index]
    enum_value = enumerate(ecludian_distance)
    print(enum_value)
    item_list = list(enum_value)
    top_list = sorted(item_list, key=lambda x:x[1], reverse=False)
    top_five_recommended = top_list[1:6]
    return top_five_recommended

In [54]:
booksList = recommendBook('2nd Chance')
for item in booksList:
    print(pt.index[item[0]])

<enumerate object at 0x29146dbc0>
A Child Called \It\": One Child's Courage to Survive"
A Heartbreaking Work of Staggering Genius
A Virtuous Woman (Oprah's Book Club (Paperback))
A Widow for One Year
Angela's Ashes: A Memoir


In [58]:
booksList = recommendBook('1984')
for item in booksList:
    print(pt.index[item[0]])

<enumerate object at 0x2b7654800>
A Thin Dark Line (Mysteries &amp; Horror)
A Widow for One Year
Acceptable Risk
Artemis Fowl (Artemis Fowl, Book 1)
Atonement : A Novel
