In [2]:
#import libraries (you may add additional imports but you may not have to)
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.neighbors import KNeighborsClassifier
import tensorflow_recommenders as tfrs
import tensorflow_datasets as tfds

In [3]:
#get data
books_filename = './data/BX-Books.csv'
users_filename = './data/BX-Users.csv'
ratings_filename = './data/BX-Book-Ratings.csv'

# import csv data into dataframes
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_users = pd.read_csv(users_filename,
    sep=';',
    encoding="latin-1",
    error_bad_lines=False)

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})



  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
#rename df_users columns
df_users.rename(columns = {'User-ID':'user', 'Location':'location', 'Age':'age'}, inplace=True)
df_users.head()

Unnamed: 0,user,location,age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [5]:
#We only want users with more than 200 ratings
x = df_ratings['user'].value_counts() > 200
y = x[x].index  #users
df_ratings = df_ratings[df_ratings['user'].isin(y)]
df_ratings

Unnamed: 0,user,isbn,rating
1456,277427,002542730X,10.0
1457,277427,0026217457,0.0
1458,277427,003008685X,8.0
1459,277427,0030615321,0.0
1460,277427,0060002050,0.0
...,...,...,...
1147612,275970,3829021860,0.0
1147613,275970,4770019572,0.0
1147614,275970,896086097,0.0
1147615,275970,9626340762,8.0


In [6]:
book_ratings = df_ratings.merge(df_books, on = 'isbn')
book_ratings

Unnamed: 0,user,isbn,rating,title,author
0,277427,002542730X,10.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
1,3363,002542730X,0.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
2,11676,002542730X,6.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
3,12538,002542730X,10.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
4,13552,002542730X,0.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
...,...,...,...,...,...
487680,275970,1892145022,0.0,Here Is New York,E. B. White
487681,275970,1931868123,0.0,There's a Porcupine in My Outhouse: Misadventu...,Mike Tougias
487682,275970,3411086211,10.0,Die Biene.,Sybil GrÃ?Â¤fin SchÃ?Â¶nfeldt
487683,275970,3829021860,0.0,The Penis Book,Joseph Cohen


In [7]:
#Now we are going to ensure that every book has at least 50 ratings
ratings_num = book_ratings.groupby('title')['rating'].count().reset_index()
ratings_num.rename(columns= {'rating':'num_ratings'}, inplace=True)
ratings = book_ratings.merge(ratings_num, on='title')
ratings = ratings[ratings['num_ratings'] >= 50]
ratings.drop_duplicates(['user','title'], inplace=True)
ratings.head()

Unnamed: 0,user,isbn,rating,title,author,num_ratings
0,277427,002542730X,10.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,82
1,3363,002542730X,0.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,82
2,11676,002542730X,6.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,82
3,12538,002542730X,10.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,82
4,13552,002542730X,0.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,82


In [33]:
#creating of pivot table for modeling and set na values to zero for books a user did not rate
pivot = ratings.pivot_table(index = 'title', values = 'rating',aggfunc= len, columns = 'user')
pivot = pivot.fillna(0)

In [34]:
book_sparse = csr_matrix(pivot)

In [35]:
book_model = NearestNeighbors(algorithm='brute')
book_model.fit(book_sparse)

NearestNeighbors(algorithm='brute')

In [36]:
print(pivot)

user                                                254     2276    2766    \
title                                                                        
1984                                                   1.0     0.0     0.0   
1st to Die: A Novel                                    0.0     0.0     0.0   
2nd Chance                                             0.0     1.0     0.0   
4 Blondes                                              0.0     0.0     0.0   
84 Charing Cross Road                                  0.0     0.0     0.0   
...                                                    ...     ...     ...   
Year of Wonders                                        0.0     0.0     0.0   
You Belong To Me                                       0.0     0.0     0.0   
Zen and the Art of Motorcycle Maintenance: An I...     0.0     0.0     0.0   
Zoya                                                   0.0     0.0     0.0   
\O\" Is for Outlaw"                                    0.0     0

In [55]:
distances, suggestions = book_model.kneighbors(pivot.loc['Harry Potter and the Chamber of Secrets (Book 2)', :].values.reshape(1, -1))
for i in range(len(suggestions)):
    print(pivot.index[suggestions[i]])
print(distances[0][4])

Index(['Harry Potter and the Chamber of Secrets (Book 2)',
       'Harry Potter and the Prisoner of Azkaban (Book 3)',
       'Harry Potter and the Goblet of Fire (Book 4)',
       'Harry Potter and the Order of the Phoenix (Book 5)',
       'Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))'],
      dtype='object', name='title')
12.083045973594572


In [56]:
#Recconmend a book function
def What_to_Read_Next(book):
    distances, suggestions = book_model.kneighbors(pivot.loc[book, :].values.reshape(1, -1))
    print("Hi User! Thank you for using 'What to Read Next?'. Check out the following books and the rating of how close they are to your preferences!")
    for i in range(len(suggestions)):
           print(pivot.index[suggestions[i]])

In [57]:
What_to_Read_Next('Politically Correct Bedtime Stories: Modern Tales for Our Life and Times')

Hi User! Thank you for using 'What to Read Next?'. Check out the following books and the rating of how close they are to your preferences!
Index(['Politically Correct Bedtime Stories: Modern Tales for Our Life and Times',
       'The Bourne Supremacy', 'Tis: A Memoir',
       'All Creatures Great and Small',
       'A Man Named Dave: A Story of Triumph and Forgiveness'],
      dtype='object', name='title')
