In [4]:
# Import libraries
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
# Ignore printing warnings for general readability
import warnings 
warnings.filterwarnings("ignore")

In [5]:
book_path = "./dataset/BX-Books"

In [6]:
# Fix the &amp; and other HTML escape sequences
import html
import os
print(os.getcwd())

raw_book_path = book_path
book_path = "./dataset/BX-Books-HTMLfixed"
if not os.path.isfile(f'{book_path}.csv'): # If doesn't exist
    with open(f'{raw_book_path}.csv', 'r', encoding='latin-1') as f, open(f'{book_path}.csv', 'w') as g:
        content = html.unescape(f.read())
        g.write(content)
        print("Fixed file {0}.csv".format(raw_book_path))

C:\Users\Legion\Downloads\b\1INTRO2AI\GithubDesktop\book-rec


In [7]:
# Loading the dataset 
def loaddata(filename):
    df = pd.read_csv(f'{filename}.csv',sep=';',error_bad_lines=False,warn_bad_lines=False,encoding='latin-1')
    return df

book   = loaddata(book_path)
user   = loaddata("./dataset/BX-Users")
rating = loaddata("./dataset/BX-Book-Ratings")

In [8]:
#Preprocessing Data
book = book[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher']]
book.rename(columns = {'Book-Title':'title', 'Book-Author':'author', 'Year-Of-Publication':'year', 'Publisher':'publisher'}, inplace=True)
user.rename(columns = {'User-ID':'user_id', 'Location':'location', 'Age':'age'}, inplace=True)
rating.rename(columns = {'User-ID':'user_id', 'Book-Rating':'rating'}, inplace=True)

In [9]:
rating

Unnamed: 0,user_id,ISBN,rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [10]:
# Let's visualize ratings given by users

rating_users = rating['user_id'].value_counts().reset_index().\
               rename({'index':'user_id','user_id':'rating'}, axis=1)
rating_users

Unnamed: 0,user_id,rating
0,11676,13602
1,198711,7550
2,153662,6109
3,98391,5891
4,35859,5850
...,...,...
105278,116180,1
105279,116166,1
105280,116154,1
105281,116137,1


In [11]:
# Let's visualize ratings received by books

rating_books = rating['ISBN'].value_counts().reset_index().\
               rename({'index':'ISBN','ISBN':'rating'}, axis=1)
rating_books

Unnamed: 0,ISBN,rating
0,0971880107,2502
1,0316666343,1295
2,0385504209,883
3,0060928336,732
4,0312195516,723
...,...,...
340551,1568656386,1
340552,1568656408,1
340553,1569551553,1
340554,1570081808,1


In [12]:
# In order to avoid rating bias & for making good recommendations, limit the dataset to only those
# users that have made at least 250 ratings & books that have received at least 50 ratings

rating = rating[rating['user_id'].isin(rating_users[rating_users['rating']>200]['user_id'])]
rating = rating[rating['ISBN'].isin(rating_books[rating_books['rating']> 50]['ISBN'])]

rating


Unnamed: 0,user_id,ISBN,rating
1456,277427,002542730X,10
1468,277427,006092988X,0
1469,277427,0060930535,0
1470,277427,0060932139,0
1471,277427,0060934417,0
...,...,...,...
1147440,275970,1400031354,0
1147441,275970,1400031362,0
1147470,275970,1558744606,0
1147517,275970,1573229725,0


In [13]:
# For the recommendation system, it is prefered to have the book titles rather than ISBN for easier interpretation

rating = rating.merge(book, on="ISBN")[['user_id','title','rating','ISBN']] # merging with the book dataframe
rating                                                                         # on 'ISBN' to get 'Book-Title'

Unnamed: 0,user_id,title,rating,ISBN
0,277427,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
1,3363,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
2,11676,Politically Correct Bedtime Stories: Modern Ta...,6,002542730X
3,12538,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
4,13552,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
...,...,...,...,...
89924,234828,Ringworld,8,0345333926
89925,236283,Ringworld,0,0345333926
89926,249628,Ringworld,0,0345333926
89927,261829,Ringworld,0,0345333926


In [14]:
# Check for duplicate values
print(f'Duplicate entries: {rating.duplicated().sum()}')

Duplicate entries: 0


In [15]:
rating.drop_duplicates(inplace=True)
rating

Unnamed: 0,user_id,title,rating,ISBN
0,277427,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
1,3363,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
2,11676,Politically Correct Bedtime Stories: Modern Ta...,6,002542730X
3,12538,Politically Correct Bedtime Stories: Modern Ta...,10,002542730X
4,13552,Politically Correct Bedtime Stories: Modern Ta...,0,002542730X
...,...,...,...,...
89924,234828,Ringworld,8,0345333926
89925,236283,Ringworld,0,0345333926
89926,249628,Ringworld,0,0345333926
89927,261829,Ringworld,0,0345333926


## User-Based Collaborative Filtering Recommendation Algorithm    

In [16]:
#Create user-item matrix
book_pivot = rating.pivot_table(columns='ISBN', index='user_id', values="rating")
book_pivot

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,0.0,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,0.0,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
2977,,,,,,,,,,,...,,,,,,,,,,
3363,,,,0.0,0.0,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,10.0,,,,,,,...,,,,,,,,,,
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


    We normalize the rating by extracting the average rating of users. After normalization, the item with a rating less then user's average rating get a negative value (user don't like item), and the item with a rating more than the user's average rating get a positive value (user likes item).

In [17]:
# Normalize user-item matrix
# axis = 1 cloumns
# axis = 0 index 
matrix_norm = book_pivot.subtract(book_pivot.mean(1), axis=0 )
matrix_norm

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,-1.909091,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,-4.035714,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
2977,,,,,,,,,,,...,,,,,,,,,,
3363,,,,-0.890411,-0.890411,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,8.054795,,,,,,,...,,,,,,,,,,
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


In [18]:
# Matrix with NaN values
matrix_norm_nan = matrix_norm.copy(deep=True)
matrix_norm_nan

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,,,,,,-1.909091,,,,,...,,,,,,,,,,
2276,,,,,,,,,,,...,,,,,,-4.035714,,,,
2766,,,,,,,,,,,...,,,,,,,,,,
2977,,,,,,,,,,,...,,,,,,,,,,
3363,,,,-0.890411,-0.890411,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275970,,,,,,,,,,,...,,,,,,,,,,
277427,,,,8.054795,,,,,,,...,,,,,,,,,,
277478,,,,,,,,,,,...,,,,,,,,,,
277639,,,,,,,,,,,...,,,,,,,,,,


Identify Similar Users

In [19]:
matrix_norm.fillna(0, inplace=True)
matrix_norm

ISBN,000649840X,002026478X,0020442203,002542730X,0028604199,006000438X,0060008032,0060008776,006001203X,0060085444,...,1860492592,1878424319,1885171080,1931561648,3257228007,3257229534,3404148665,3423202327,3442541751,3492045170
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,0.0,0.0,0.00000,0.000000,0.000000,-1.909091,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2276,0.0,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-4.035714,0.0,0.0,0.0,0.0
2766,0.0,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2977,0.0,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
3363,0.0,0.0,0.00000,-0.890411,-0.890411,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275970,0.0,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
277427,0.0,0.0,0.00000,8.054795,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
277478,0.0,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
277639,0.0,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


In [20]:
# user_similarity = matrix_norm.T.corr()
# user_similarity

In [21]:
user_similarity_matrix = pd.DataFrame(cosine_similarity(matrix_norm), index=matrix_norm.T.columns, columns=matrix_norm.T.columns)
user_similarity_matrix

user_id,254,2276,2766,2977,3363,3757,4017,4385,6242,6251,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254,1.000000,-0.016016,-0.002798,0.033965,0.004848,0.0,-0.082784,0.000000,-0.016394,0.063623,...,0.002531,-0.045615,0.009170,0.032551,-0.028994,-0.027925,-0.034256,0.000000,0.000000,-0.001477
2276,-0.016016,1.000000,-0.018555,0.012576,-0.006342,0.0,-0.011696,0.089512,0.000000,0.000203,...,0.005960,-0.017207,-0.025529,-0.006704,0.000000,0.001925,0.010673,0.006842,-0.006306,0.003567
2766,-0.002798,-0.018555,1.000000,-0.019180,0.012346,0.0,0.045174,0.000000,-0.018767,-0.041767,...,-0.002803,0.004021,-0.029962,-0.021462,0.006168,0.033103,-0.047899,0.098643,-0.052885,-0.009247
2977,0.033965,0.012576,-0.019180,1.000000,-0.009183,0.0,0.020483,0.000000,0.030649,0.035695,...,-0.021983,-0.023345,0.048257,0.009690,-0.032235,-0.049652,-0.005767,0.011726,-0.023445,0.002900
3363,0.004848,-0.006342,0.012346,-0.009183,1.000000,0.0,0.034151,0.000000,0.004847,-0.019875,...,0.003188,0.008531,0.002046,-0.008537,0.086984,-0.011058,-0.011186,0.007560,-0.007326,-0.011965
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275970,-0.027925,0.001925,0.033103,-0.049652,-0.011058,0.0,0.001582,-0.015220,0.016535,0.009892,...,-0.045960,-0.008647,-0.027877,0.001048,-0.043595,1.000000,-0.050134,0.000000,-0.005014,-0.006475
277427,-0.034256,0.010673,-0.047899,-0.005767,-0.011186,0.0,0.022116,0.000000,0.016516,0.011634,...,-0.006760,0.003114,0.019559,0.069339,-0.009832,-0.050134,1.000000,0.004976,0.014397,0.000618
277478,0.000000,0.006842,0.098643,0.011726,0.007560,0.0,0.004288,0.028239,0.000000,0.007142,...,0.004716,-0.039116,0.009791,0.008749,0.000000,0.000000,0.004976,1.000000,-0.006603,0.002177
277639,0.000000,-0.006306,-0.052885,-0.023445,-0.007326,0.0,0.000000,0.000000,0.000000,-0.010763,...,0.009096,-0.003556,-0.038485,-0.041328,0.019449,-0.005014,0.014397,-0.006603,1.000000,-0.004950


In the user similarity matrix, the values range from -1 to 1, where -1 means opposite book preference and 1 means same book preference

In [22]:
# number of similar users
k = 40
# Pick a user ID
picked_user = 13552
# Get top k similar users
similar_user = user_similarity_matrix[picked_user].sort_values(ascending=False)[1:k+1]
similar_user

user_id
180586    0.107859
129358    0.102007
68555     0.097953
107453    0.097501
101209    0.088199
113817    0.081777
213150    0.080601
271195    0.079018
187520    0.078697
105979    0.077819
106225    0.077592
217375    0.076589
271284    0.076341
153662    0.076307
40943     0.075915
56360     0.075541
95193     0.074987
56856     0.074119
135265    0.073210
214786    0.070132
170742    0.061260
168064    0.058958
242083    0.058480
124487    0.058383
104636    0.057688
246513    0.056877
107784    0.056053
40889     0.056037
218608    0.055725
207499    0.055689
164828    0.055245
116599    0.054416
155014    0.054096
85993     0.053978
6563      0.053390
235935    0.053167
20115     0.052604
104429    0.052486
7915      0.052335
149934    0.050912
Name: 13552, dtype: float64

We narrow down items by doing the following:
<ul><li> Remove the books that have been read by target user
    <li> Keep only the movies that similar users have read

In [23]:
# Books that similar users read. Remove that none of the similar users have read
similar_user_book = matrix_norm_nan[matrix_norm_nan.index.isin(similar_user.index)].dropna(axis=1, how='all')
similar_user_book

ISBN,002026478X,006000438X,0060008032,0060008776,0060085444,0060090367,0060096195,0060168013,006016848X,0060173289,...,1573229083,157322930X,1573229326,1573229571,1576737330,1592400876,1857022424,1860492592,1885171080,1931561648
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6563,,,,,3.321429,,,,,,...,,,,,,,,,,
7915,,,,,,,,,-2.197674,,...,,,,,,,,,,
20115,,,,,,,,,,,...,,,,,,,,,,
40889,,,,,,,,,,,...,,,,,-2.563218,,,,,
40943,,,,,-1.186441,,,,,,...,,,,-1.186441,,,-1.186441,,,-1.186441
56360,,,,,,,,,,,...,,,,,,,,,,
56856,,,,,,,,,,,...,,,,,,,,,,
68555,,,,,,,,,-1.845188,,...,,,,,,,,,,
85993,,,,,,,,,,,...,,,,,,,,,,
95193,,,,,,,,,,,...,,,,,,,,,,


In [24]:
# Books that the target user has watched
picked_userid_read = matrix_norm_nan[matrix_norm_nan.index == picked_user].dropna(axis=1, how='all')
picked_userid_read

ISBN,002542730X,006001203X,0060085444,0060096195,0060557257,0060928336,006092988X,0060976845,0060987529,006098824X,...,0786868619,0786868716,0786890436,080213825X,080410526X,080411868X,1551668653,1558745157,157322930X,1573229385
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13552,-2.731707,2.268293,4.268293,4.268293,-2.731707,-2.731707,-2.731707,-2.731707,-2.731707,-2.731707,...,-2.731707,6.268293,-2.731707,-2.731707,-2.731707,-2.731707,-2.731707,-2.731707,-2.731707,-2.731707


In [25]:
# Remove the read books from the book list
similar_user_book.drop(picked_userid_read.columns,axis=1, inplace = True,errors = 'ignore')
similar_user_book

ISBN,002026478X,006000438X,0060008032,0060008776,0060090367,0060168013,006016848X,0060173289,0060175400,006019491X,...,1573228737,1573229083,1573229326,1573229571,1576737330,1592400876,1857022424,1860492592,1885171080,1931561648
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6563,,,,,,,,,,,...,,,,,,,,,,
7915,,,,,,,-2.197674,,,,...,,,,,,,,,,
20115,,,,,,,,,-0.57971,,...,,,,,,,,,,
40889,,,,,,,,,,,...,,,,,-2.563218,,,,,
40943,,,,,,,,,,,...,,,,-1.186441,,,-1.186441,,,-1.186441
56360,,,,,,,,,,,...,,,,,,,,,,
56856,,,,,,,,,,,...,,,,,,,,,,
68555,,,,,,,-1.845188,,,-1.845188,...,,,,,,,,,,
85993,,,,,,,,,,,...,,,,,,,,,,
95193,,,,,,,,,,,...,,,,,,,,,,


In [26]:
item_score = {}

for i in similar_user_book.columns:
    book_rating = similar_user_book[i]
    # create a variable to score the score
    total = 0
    # variable to store the number of score
    divide = 0
    for u in similar_user.index:
        if pd.isna(book_rating[u]) == False:
            score = similar_user[u] * book_rating[u]
            total +=score
#             divide += abs(book_rating[u])
            divide+=1
    # get average score for item 
    item_score[i] = total/(divide+1e-8)

m = 10
item_score = pd.DataFrame(item_score.items(), columns=['ISBN', 'book_score'])
ranked_item_score = item_score.sort_values(by= 'book_score', ascending=False)

ranked_book = ranked_item_score.head(10)
ranked_book

Unnamed: 0,ISBN,book_score
723,446530522,0.720361
1331,767902521,0.700837
1288,684874350,0.687132
1246,679446486,0.687132
87,99771519,0.680165
1121,618002219,0.669081
242,345362861,0.669081
85,64472795,0.669081
1315,743418190,0.643771
869,451191145,0.613013


In [27]:
book_recommend = pd.merge(book, ranked_book, on="ISBN").sort_values(by= 'book_score', ascending=False).reset_index(drop=True)
book_recommend.index += 1 
print("Book recommendation for user {0}:".format(picked_user))
book_recommend

Book recommendation for user 13552:


Unnamed: 0,ISBN,title,author,year,publisher,book_score
1,446530522,Cane River,Lalita Tademy,2001,Warner Books,0.720361
2,767902521,A Walk in the Woods: Rediscovering America on ...,Bill Bryson,1999,Broadway Books,0.700837
3,679446486,Airframe,Michael Crichton,1996,Alfred A. Knopf,0.687132
4,684874350,ANGELA'S ASHES,Frank McCourt,1996,Scribner,0.687132
5,99771519,Memoirs of a Geisha Uk,Arthur Golden,0,Trafalgar Square,0.680165
6,618002219,The Hobbit: or There and Back Again,J.R.R. Tolkien,1999,Houghton Mifflin Company,0.669081
7,64472795,Princess in the Spotlight (The Princess Diarie...,Meg Cabot,2002,HarperTrophy,0.669081
8,345362861,Dragonsdawn,Anne McCaffrey,1989,Del Rey Books,0.669081
9,743418190,In Her Shoes : A Novel,Jennifer Weiner,2002,Atria Books,0.643771
10,451191145,Atlas Shrugged,Ayn Rand,1996,Signet Book,0.613013
