In [124]:
import numpy as np
import pandas as pd

from data_loader import BooksDataLoader
from book_recommender import BookRecommender
from book_recommender import partition
from book_recommender import hit_rate

In [125]:
books_data_loader = BooksDataLoader()

readers_data = books_data_loader.get_readers_data()
genre_similarity = books_data_loader.get_genre_similarity_matrix()
books_data = books_data_loader.get_books_data()
books_meta_data = books_data_loader.get_books_meta_data()

In [126]:
readers_data.head()

Unnamed: 0,user_id,book_id,rating
0,1,258,5
1,2,4081,4
2,2,260,5
3,2,9296,5
4,2,2318,3


In [127]:
books_meta_data = books_meta_data.set_index('book_id').join(books_data.set_index('book_id'))

In [146]:
selected_user_list = readers_data.user_id.unique()[:200].tolist()

In [147]:
books_available = set(genre_similarity.index)

In [152]:
books_reviewed_by_users = []
rating_threshold = 3

for user in selected_user_list:

    reviewed_all =  set(readers_data[readers_data.user_id == user].book_id.values)
    reviewed = reviewed_all.intersection(books_available)

    # filter out 
    reviewed = set(book_id for book_id in reviewed
                    if (readers_data[(readers_data.user_id == user) 
                                     & (readers_data.book_id == book_id)].rating >= rating_threshold).any())

    if len(reviewed) > 1:
        books_reviewed_by_users.append([user, reviewed])

In [153]:
books_reviewed_by_users_df = pd.DataFrame(books_reviewed_by_users, columns=['User', 'Books_Reviewed'])

In [155]:
books_reviewed_by_users_df.head()

Unnamed: 0,User,Books_Reviewed
0,1,"{1796, 6665, 3638, 54, 1176}"
1,2,"{193, 8034, 264, 3305, 211, 54}"
2,4,"{7, 264, 492, 401, 54, 344, 1210, 575}"
3,6,"{2661, 2279, 4079, 9295, 2321, 374, 247, 1976, 3933}"
4,8,"{264, 2781, 54}"


In [156]:
selected_user_index = 7
reviewed_books = books_reviewed_by_users_df.iloc[selected_user_index, 1]
reviewed_books

{3, 52, 310, 344, 795, 1042, 1381, 1645, 1993, 2661, 7260}

In [9]:
selected_user_id = books_reviewed_by_users_df.iloc[selected_user_index, 0]
selected_user_id

15

In [10]:
books_reviewed_by_users_df.shape

(165, 2)

In [11]:
reviewed_books_sub = books_data.loc[books_data.book_id.isin(reviewed_books), 
                                    ['book_id','original_title', 'authors', 'average_rating']]
reviewed_books_sub = reviewed_books_sub.set_index('book_id')

In [12]:
reviewed_books_sub

Unnamed: 0_level_0,original_title,authors,average_rating
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,Twilight,Stephenie Meyer,3.57
52,Eclipse,Stephenie Meyer,3.69
310,Good in Bed,Jennifer Weiner,3.7
344,Naked,David Sedaris,4.08
795,About a Boy,Nick Hornby,3.79
1042,Dirk Gently's Holistic Detective Agency,Douglas Adams,3.97
1381,The Eyre Affair,Jasper Fforde,3.92
1645,Rant: An Oral Biography of Buster Casey,Chuck Palahniuk,3.82
1993,The Vampire Armand,Anne Rice,3.75
2661,Bonk: The Curious Coupling of Science and Sex,Mary Roach,3.83


In [13]:
pd.set_option('max_colwidth', 200)

In [14]:
display(books_meta_data.loc[books_meta_data.book_id.isin(reviewed_books), 
                            ['book_id', 'genre']].set_index('book_id').join(reviewed_books_sub))

Unnamed: 0_level_0,genre,original_title,authors,average_rating
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1993,Fiction / Fantasy / Paranormal Fiction / Horror Fiction / Thrillers / Supernatural,The Vampire Armand,Anne Rice,3.75
7260,Fiction / Erotica / General Fiction / Romance / General,The Feast of All Saints,Anne Rice,3.83
344,Humor / Form / Essays,Naked,David Sedaris,4.08
1042,Fiction / Media Tie-In Fiction / Mystery & Detective / Private Investigators Fiction / Science Fiction / Action & Adventure,Dirk Gently's Holistic Detective Agency,Douglas Adams,3.97
310,Fiction / Family Life / General Fiction / Literary Fiction / Women,Good in Bed,Jennifer Weiner,3.7
795,Fiction / Coming of Age Fiction / Humorous / General Fiction / Literary,About a Boy,Nick Hornby,3.79
3,"Young Adult Fiction / Fantasy / Contemporary Young Adult Fiction / Paranormal, Occult & Supernatural Young Adult Fiction / Romance / Paranormal Young Adult Fiction / Social Themes / Dating & Sex Y...",Twilight,Stephenie Meyer,3.57
52,"Young Adult Fiction / Fantasy / Contemporary Young Adult Fiction / Paranormal, Occult & Supernatural Young Adult Fiction / Romance / Paranormal Young Adult Fiction / Social Themes / Dating & Sex Y...",Eclipse,Stephenie Meyer,3.69
1645,Fiction / Literary Fiction / Satire Fiction / Thrillers / General,Rant: An Oral Biography of Buster Casey,Chuck Palahniuk,3.82
1381,Fiction / Alternative History Fiction / Fantasy / Historical Fiction / Mystery & Detective / Women Sleuths,The Eyre Affair,Jasper Fforde,3.92


**#TODO Consolidate genres where applicable**

Notice that book '7379' has 'Humorous' listed as one of it's genre yet, similarity score is 0.0 with the book '534' even though one of it's genre is 'Humor'. That's for user 28

In [15]:
genre_similarity[genre_similarity.index == 7379]['1923']

book_id
7379    0.0
Name: 1923, dtype: float64

# Partition The Data

In [16]:
books_reviewed_by_users_df.head()

Unnamed: 0,User,Books_Reviewed
0,1,"{1796, 6665, 3638, 54, 1176}"
1,2,"{193, 8034, 264, 3305, 211, 54}"
2,4,"{7, 264, 492, 401, 54, 344, 1210, 575}"
3,6,"{2661, 2279, 4079, 9295, 2321, 374, 247, 1976, 3933}"
4,8,"{264, 2781, 54}"


In [17]:
part_list, left_out_list = partition(books_reviewed_by_users_df.Books_Reviewed.values)

In [18]:
books_reviewed_by_users_df.loc[:, 'Books_Reviewed'] = part_list
books_reviewed_by_users_df.loc[:, 'Left_Out'] = left_out_list

In [19]:
books_reviewed_by_users_df.head(n=5)

Unnamed: 0,User,Books_Reviewed,Left_Out
0,1,"{1796, 3638, 54, 1176}",6665
1,2,"{193, 8034, 264, 3305, 54}",211
2,4,"{7, 264, 492, 401, 54, 1210, 575}",344
3,6,"{2661, 2279, 4079, 9295, 2321, 374, 247, 3933}",1976
4,8,"{2781, 54}",264


# Make Recommendations

In [20]:
book_recommender = BookRecommender()

In [21]:
%%time
recommended_books = book_recommender.recommend(books_reviewed_by_users_df.User.values,
                                               books_reviewed_by_users_df.Books_Reviewed.values)

CPU times: user 1min 50s, sys: 1 s, total: 1min 51s
Wall time: 1min 51s


## Metrics

In [22]:
hrate = hit_rate(recommended_books, books_reviewed_by_users_df.Left_Out.values)
hrate

0.49696969696969695

In [23]:
books_reviewed_by_users_df.loc[:, 'Recommended_Books'] = recommended_books

In [55]:
def topn(user_id, n=10, df=books_reviewed_by_users_df):

    book_list = list(df.loc[df.User == user_id, 'Recommended_Books'].values[0])
    books_with_rating = books_data.loc[books_data.index.isin(book_list), ['book_id', 'average_rating']]
    return books_with_rating.nlargest(n, 'average_rating').book_id.tolist()

In [56]:
top_list = list(map(str, [topn(user_id, 10) for user_id in books_reviewed_by_users_df.User]))

In [47]:
hit_rate(top_list, books_reviewed_by_users_df.Left_Out.values)

0.12727272727272726

In [122]:
def top_recommendation_by_user(user_id):
    
    top_10 = topn(user_id)
    print(top_10)
    top_10_md = books_data.loc[books_data.book_id.isin(top_10), ['book_id', 'title', 'authors', 'average_rating']]

    #print(books_meta_data[books_meta_data.index == top_10])
    #meta_data = books_meta_data.reset_index()
    #print(meta_data.head(n=1))
    #print(meta_data[meta_data.isin(top_10)])
    #top_10_md = meta_data.loc[meta_data.index.isin(top_10), 
    #                                ['genre', 'original_title', 'average_rating', 'authors']]
    return top_10_md

In [123]:
#books_meta_data.loc[books_meta_data.book_id.isin(selected_user_top_n.index.values), ['genre', 'book_id']].set_index('book_id').join(selected_user_top_n)
top_recommendation_by_user(4)
#books_meta_data.head()
#books_meta_data.index

[5919, 4868, 8172, 8918, 1340, 3759, 4224, 1760, 5523, 7269]


Unnamed: 0,book_id,title,authors,average_rating
1339,1340,"Dead Beat (The Dresden Files, #7)",Jim Butcher,4.43
1759,1760,"Keys to the Demon Prison (Fablehaven, #5)",Brandon Mull,4.42
3758,3759,"Sookie Stackhouse 7-copy Boxed Set (Sookie Stackhouse, #1-7)",Charlaine Harris,4.43
4223,4224,"Motorcycle Man (Dream Man, #4)",Kristen Ashley,4.43
4867,4868,Jesus the Christ,James E. Talmage,4.63
5522,5523,"Bleach, Volume 15",Tite Kubo,4.41
5918,5919,Life Application Study Bible: NIV,"Anonymous, Ronald A. Beers, Ronald A. Beers",4.67
7268,7269,Arch of Triumph: A Novel of a Man Without a Country,Erich Maria Remarque,4.4
8171,8172,"Ouran High School Host Club, Vol. 2 (Ouran High School Host Club, #2)",Bisco Hatori,4.46
8917,8918,"Rock Chick Revolution (Rock Chick, #8)",Kristen Ashley,4.46
