In [1]:
# imports
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import operator

In [2]:
# datsets
df = pd.read_csv("ratings.csv")
bk = pd.read_csv("books.csv")

In [3]:
df.head(2)

Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3


In [4]:
bk.head(2)

Unnamed: 0,id,book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...


In [5]:
# Adding book titles to dataframe
df = pd.merge(df,bk[['book_id','title']],on='book_id')

In [6]:
df.head()

Unnamed: 0,book_id,user_id,rating,title
0,1,314,5,Harry Potter and the Half-Blood Prince (Harry ...
1,1,439,3,Harry Potter and the Half-Blood Prince (Harry ...
2,1,588,5,Harry Potter and the Half-Blood Prince (Harry ...
3,1,1169,4,Harry Potter and the Half-Blood Prince (Harry ...
4,1,1185,4,Harry Potter and the Half-Blood Prince (Harry ...


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 79701 entries, 0 to 79700
Data columns (total 4 columns):
book_id    79701 non-null int64
user_id    79701 non-null int64
rating     79701 non-null int64
title      79701 non-null object
dtypes: int64(3), object(1)
memory usage: 3.0+ MB


In [9]:
df.user_id.nunique()

28906

In [10]:
df.book_id.nunique()

812

In [11]:
# creating mmatrix for User-Item Collaborative Filtering
mat = (df.pivot_table(values='rating', index='book_id', columns='user_id'))

In [12]:
mat.shape

(812, 28906)

In [13]:
mat.fillna(0,inplace=True)

In [14]:
mat.head(2)

user_id,2,3,4,7,9,10,11,14,15,19,...,53404,53406,53408,53409,53416,53419,53420,53422,53423,53424
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
all_user_ids = mat.columns
book_ids = mat.index

In [16]:
def similarities(user):
        '''
        For user-item filtering finds pearson correlation with each other user
        
        Args : User_id
        Return : Sorted list of tuples of user_id and pearson correlation
        '''
        user_ids=list(all_user_ids)
        user_ids.remove(user)
        d={}
        for uid in user_ids:
            score =pearsonr(mat[user],mat[uid])[0]
            d[uid]= score
        similarity = sorted(d.items(), key=operator.itemgetter(1), reverse=True)  
        return similarity

In [17]:
def books_rated(user):
        '''
        finds the books rated by the user
        
        Args : User_id
        Return : list of ids of books rated by user
        '''
    book_list = []
    for i in df[df['user_id']==user]['book_id']:
        book_list.append(i)
    return book_list

In [58]:
def recommendations(similarities , book_list , min_rating=3 , no_of_recommendations=3):
    '''
        Based on user-item filtering creates a list of recommende book ids and alist of similar users
        
        Args : 1. similarities : Sorted list of tuples of user_id and pearson correlation
               2. book_list :  list of books rated by the user
               3. min_rating: min rating given by similar user to recommended books
               4. no_of_recommendations : Number of books needed for recomendation
        Return : list of ids of books rated by user
        '''
    reco_books=[]
    similar_user=[]
    for u_id, pea in similarities:
        for line in df[df['user_id']==u_id].itertuples():
            if line[3]>min_rating and line[1] not in book_list and line[1] not in reco_books:
                reco_books.append(line[1])
                similar_user.append(line[2])
            else:
                pass
            if len(reco_books)==no_of_recommendations:
                    break 
        if len(reco_books)==no_of_recommendations:
                    break
                
    return reco_books,similar_user

In [19]:
def titles(book_ids):
    '''
    Fetches book titles
    '''
    title_list=[]
    for i in book_ids:
        for data in bk[bk['book_id']==i].itertuples():
            title_list.append(data[11])
        
    return title_list    

In [48]:
def recommend(user):
    '''
    Prints similar users ids and titles of recommended books
    '''
    if user in all_user_ids:
        similarity = similarities(user)
        user_rated_books = books_rated(user)
        recommended_books , similar_user_ids= recommendations(similarity,user_rated_books)
        print("\n")
        print("Similar users are : %s"%similar_user_ids)
        print("\n")
        print("Recommended books for the user %s are :"%user)
        for i in titles(recommended_books):
            print("\t%s}"%i)
       
    else: 
        print("Not a Valid User")    

    

In [42]:
def test():
    user = int(input("Enter User Id : "))
    recommend(user)


In [67]:
test()

Enter User Id : 439


Similar users are : [12381, 12381, 12381]


Recommended books for the user 439 are :
	Harry Potter and the Prisoner of Azkaban (Harry Potter, #3)}
	Harry Potter Collection (Harry Potter, #1-6)}
	The Ultimate Hitchhiker's Guide to the Galaxy}
