# Using NMF for existing user recommendations

In [1]:
# Import libraries

import pandas as pd
from surprise import NMF
from surprise import Dataset, Reader
from collections import defaultdict
from surprise import dump

In [2]:
# Read in data

sample = pd.read_csv('../data/books_reviews_sample.csv')

In [3]:
# reordered columns to make it readable for surprise

ratings = sample[['user_id', 'book_id', 'rating']]

In [4]:
ratings.head(5)

Unnamed: 0,user_id,book_id,rating
0,84f866eb6dae54d7ac52d45a4c9b4d1f,5,4
1,f1b86bf7c103c46fcb854e1fb711b1ec,5,5
2,832c59bc39f8c9a2bc79459ae302b517,5,5
3,f8bf8e54d6de45b52d2286e733271e34,5,5
4,1f7257e13807ad631d90386772e857fa,5,5


In [5]:
ratings.shape

(91567, 3)

In [6]:
# Creater reader

reader = Reader(rating_scale=(1,5))

In [7]:
# Load dataset

dataset = Dataset.load_from_df(ratings,reader)

In [8]:
# Instantiate algorithm


algo_nmf = NMF(n_factors=8, n_epochs=40, biased=True,
               reg_pu=0.8, reg_qi=2,
               reg_bu=.03, reg_bi=0.3,
               random_state=123)
# Retrieve trainset as the entire dataset
trainset = dataset.build_full_trainset()

# Train on the trainset (dataset)
algo_nmf.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x136e3df70>

In [9]:
# Create testset - means build a testset that is outside of those that already have true ratings.
testset = trainset.build_anti_testset()

# Predict on the testset
preds_nmf = algo_nmf.test(testset)

In [11]:
len(preds_nmf) 

# 26_894_641 = number of unique users * number of unique books - total number of known ratings

26894641

In [12]:
# Read in the output dataframe - information of all the books

book_info = pd.read_csv('../data/books_info.csv', index_col=None, 
                     usecols=['book_id', 'Title', 'Author', 'Publication Year'])

In [15]:
def get_book_recoms_df(user_id):
    """
    Returns a dataframe with book recommendations
    Args: Input existing user id
    """
    top = defaultdict(list)
    
    # Get the list of ratings for the user
    for uid, iid, true_r, est, _ in preds_nmf:
        if uid == user_id:
            top[uid].append((iid, est))
    
    # Sort the list
    for uid, user_ratings in top.items():
        user_ratings.sort(key=lambda x:x[1], reverse = True)
        top[uid] = user_ratings[:10]
    
    # get list of only book IDs
    recom_id_list=[]
    for bookid_ratings_list in top.values():
        for pair in bookid_ratings_list:
            recom_id_list.append(pair[0])
    
    # make a dataframe for recommendations
    df=pd.DataFrame(index=range(1,11))
    df['book_id'] = recom_id_list
    merged_df = pd.merge(left=df, right=book_info, how='left', on='book_id')
    merged_df.drop(columns='book_id', inplace=True)
    merged_df.index = merged_df.index + 1
    
    return merged_df

In [16]:
get_book_recoms_df('84f866eb6dae54d7ac52d45a4c9b4d1f')

Unnamed: 0,Title,Author,Publication Year
1,Armstrong: The Adventurous Journey of a Mouse ...,Torben Kuhlmann,2016
2,The Animal Book,Steve Jenkins,2013
3,If I Built a House,Chris Van Dusen,2012
4,Too Many Moose,Lisa Bakos,2016
5,You Belong Here,M.H. Clark,2016
6,Our Animal Friends at Maple Hill Farm,Alice Provensen,2001
7,Ethan's Story; My Life With Autism,Ethan Rice,2012
8,Christmas Day in the Morning,Pearl S. Buck,2002
9,The Monster at the End of This Book (Sesame Bo...,Jon Stone,No year listed
10,"Sex is a Funny Word: A Book about Bodies, Feel...",Cory Silverberg,2015


In [17]:
get_book_recoms_df('2c55431540a1d77217734bf959dc6291')

Unnamed: 0,Title,Author,Publication Year
1,Our Animal Friends at Maple Hill Farm,Alice Provensen,2001
2,If I Built a House,Chris Van Dusen,2012
3,Ethan's Story; My Life With Autism,Ethan Rice,2012
4,The Animal Book,Steve Jenkins,2013
5,Too Many Moose,Lisa Bakos,2016
6,Armstrong: The Adventurous Journey of a Mouse ...,Torben Kuhlmann,2016
7,You Belong Here,M.H. Clark,2016
8,Christmas Day in the Morning,Pearl S. Buck,2002
9,The Monster at the End of This Book (Sesame Bo...,Jon Stone,No year listed
10,"Sex is a Funny Word: A Book about Bodies, Feel...",Cory Silverberg,2015


### For streamlit app demostration

- Due to file size limitations, I'm only saving some predictions to upload on streamlit for demonstration

In [20]:
# Only choosing the first 25,000 predictions.

small_preds_nmf = preds_nmf[0:25000]

In [22]:
#dump.dump('../streamlit_app/dump_NMF_small', small_preds_nmf, algo_nmf)

In [23]:
#predictions_nmf, algo_nmf = dump.load('../streamlit_app/dump_NMF_small')