# Recommender System

In [1]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler
import sqlite3

In [2]:
explicit_ratings = pd.read_csv('../data/explicit_ratings.csv')

In [3]:
explicit_ratings.drop(columns=['Unnamed: 0', 'publisher', 'year_of_publication', 'book_author'], inplace=True)

In [4]:
explicit_ratings.head()

Unnamed: 0,book_title,isbn,book_rating,user_id
0,Clara Callan,2005018,5,8
1,Clara Callan,2005018,8,11676
2,Clara Callan,2005018,8,67544
3,Clara Callan,2005018,9,116866
4,Clara Callan,2005018,8,200273


In [5]:
explicit_ratings.shape[0]

251535

In [6]:
total_ratings = pd.read_csv('../data/ratings_and_books.csv')

In [8]:
total_ratings.drop(columns='Unnamed: 0', inplace=True)

## Pivot Table

In [None]:
exp_list = explicit_ratings[['isbn', 'user_id', 'book_rating']].to_dict('list')
dict(zip(exp_list['isbn'], dict(zip(exp_list['user_id'], exp_list['book_rating']))))

In [9]:
explicit_pivot = pd.pivot_table(data=explicit_ratings, 
                            index='book_title', 
                            columns='user_id',
                            values='book_rating')

In [18]:
from sklearn.preprocessing import StandardScaler

In [19]:
scaler = StandardScaler()

In [25]:
explicit_pivot.head()

user_id,8,9,14,17,32,39,44,53,56,75,...,278694,278723,278732,278773,278798,278832,278843,278849,278851,278854
book_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)",,,,,,,,,,,...,,,,,,,,,,
"Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",,,,,,,,,,,...,,,,,,,,,,
Final Fantasy Anthology: Official Strategy Guide (Brady Games),,,,,,,,,,,...,,,,,,,,,,
"Good Wives: Image and Reality in the Lives of Women in Northern New England, 1650-1750",,,,,,,,,,,...,,,,,,,,,,
"Goosebumps Monster Edition 1: Welcome to Dead House, Stay Out of the Basement, and Say Cheese and Die!",,,,,,,,,,,...,,,,,,,,,,


In [28]:
explicit_pivot.head()

user_id,8,9,14,17,32,39,44,53,56,75,...,278723,278732,278773,278798,278832,278843,278849,278851,278854,mean
book_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)",,,,,,,,,,,...,,,,,,,,,,9.0
"Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",,,,,,,,,,,...,,,,,,,,,,7.142857
Final Fantasy Anthology: Official Strategy Guide (Brady Games),,,,,,,,,,,...,,,,,,,,,,10.0
"Good Wives: Image and Reality in the Lives of Women in Northern New England, 1650-1750",,,,,,,,,,,...,,,,,,,,,,8.0
"Goosebumps Monster Edition 1: Welcome to Dead House, Stay Out of the Basement, and Say Cheese and Die!",,,,,,,,,,,...,,,,,,,,,,6.666667


In [27]:
explicit_pivot['mean'] = explicit_pivot.apply(np.nanmean, 1)

In [None]:
explicit_pivot['std'] = explicit_pivot[explicit_pivot.columns[:-1]].apply(np.nanstd, 1)

In [None]:
np.nanmean()

In [None]:
np.nanstd()

In [20]:
item_debiased explicit_pivot

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [15]:
explicit_pivot.loc['A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)', :].max()

KeyError: 'the label [A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)] is not in the [index]'

## Sparse Matrix

In [15]:
explicit_pivot_sparse = sparse.csr_matrix(explicit_pivot.fillna(0))

## Cosine Similarity

In [20]:
explicit_recommender = pairwise_distances(explicit_pivot_sparse, metric='cosine')

In [21]:
book_recommender.shape

(53861, 53861)

In [23]:
book_recommender

array([[0., 1., 1., ..., 1., 1., 1.],
       [1., 0., 1., ..., 1., 1., 1.],
       [1., 1., 0., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 0., 1., 1.],
       [1., 1., 1., ..., 1., 0., 1.],
       [1., 1., 1., ..., 1., 1., 0.]])

## Distances DataFrame

In [24]:
book_recommender_df = pd.DataFrame(book_recommender, index=book_pivot.index, columns=book_pivot.index)

In [26]:
book_recommender_df.head()

book_title,"A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)","Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",Final Fantasy Anthology: Official Strategy Guide (Brady Games),"Good Wives: Image and Reality in the Lives of Women in Northern New England, 1650-1750","Goosebumps Monster Edition 1: Welcome to Dead House, Stay Out of the Basement, and Say Cheese and Die!","Little Comic Shop of Horrors (Give Yourself Goosebumps, Book 17)",Murder of a Sleeping Beauty (Scumble River Mysteries (Paperback)),"Q-Space (Star Trek The Next Generation, Book 47)","Q-Zone (Star Trek The Next Generation, Book 48)","The Secret of the Old Clock (Nancy Drew, Book 1)",...,stardust,"street bible, the",together by christmas,why I'm like this : True Stories,Ã?ngeles fugaces (Falling Angels),Ã?Â?. Kolumnen.,Ã?Â?ber das Fernsehen.,Ã?Â?ber die Pflicht zum Ungehorsam gegen den Staat.,Ã?Â?rger mit Produkt X. Roman.,Ã?Â?stlich der Berge.
book_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)",0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Final Fantasy Anthology: Official Strategy Guide (Brady Games),1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"Good Wives: Image and Reality in the Lives of Women in Northern New England, 1650-1750",1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"Goosebumps Monster Edition 1: Welcome to Dead House, Stay Out of the Basement, and Say Cheese and Die!",1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Evaluate Recommender Performance

In [30]:
search = 'Final Fantasy'
for t in ratings_and_books[ratings_and_books['book_title'].str.contains(search)]['book_title'].values:
    print(t)
    print('Average Rating', ratings_and_books[ratings_and_books['book_title'] == t]['book_rating'].mean())
    print('Number of ratings', ratings_and_books[ratings_and_books['book_title'] == t].shape[0])
    print('')
    print('Recommendations')
    print(book_recommender_df[t].sort_values()[1:11])
    print('')
    print('**********************************************************************')
    print('')

The Sorcerer's Companion: A Guide to the Magical World of Harry Potter
Average Rating 8.0
Number of ratings 10

Recommendations
book_title
My Girl 2: A Novel                                                                               0.566312
The Curry Book: A Celebration of Memorable Flavors and Irresistible Recipes                      0.580302
Pickup on Noon Street                                                                            0.585249
Presidential (Mis)Speak: The Very Curious Language of George W. Bush                             0.593328
The Big Dig (Carlotta Carlyle)                                                                   0.615083
Cat in a Golden Garland : A Midnight Louie Mystery (A Midnight Louie Mystery)                    0.625894
Castle (The Seventh Tower, Book 2)                                                               0.660490
Howliday Inn                                                                                     0.663162
A Cat Under t

KeyboardInterrupt: 