In [None]:
from surprise import SVD
from surprise import BaselineOnly, Dataset, Reader
from surprise.model_selection import cross_validate
import pandas as pd
from surprise.prediction_algorithms.knns import KNNBasic

In [None]:
df = pd.read_csv('movie-ratings.clean.csv')
df.rating.mean()

In [None]:
reader = Reader(rating_scale=(1, 5))
# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df[['user', 'movie', 'rating']], reader)

In [None]:
# Grab the "full" data just to get a sense of what we're working with.
# NOTE: we don't want to work with this dataset directly for measuring performance
trainset = data.build_full_trainset()
print('Dataset has %d items, %d users, and %d ratings' % \
      (trainset.n_items, trainset.n_users, trainset.n_ratings))

In [None]:
item_item_sim_options = {'name': 'pearson',
                         'user_based': False}  # <-- turns into item-item
user_user_sim_options = {'name': 'pearson',
                         'user_based': True}

# This baseline uses the mean rating + bias for the user + bias for the item
user_item_biases_baseline = BaselineOnly()

# The two options we discussed in class
user_user = KNNBasic(k=2, im_options=user_user_sim_options)
item_item = KNNBasic(k=2, sim_options=item_item_sim_options)

In [None]:
cv = cross_validate(user_user, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

In [None]:
cv = cross_validate(item_item, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

In [None]:
# What if we used cosine instead?
cosine_item_item_sim_options = {'name': 'cosine',
                         'user_based': False}  # <-- turns into item-item
cosine_user_user_sim_options = {'name': 'cosine',
                         'user_based': True}
# The two options we discussed in class
cosine_user_user = KNNBasic(k=2, sim_options=cosine_item_item_sim_options)
cosine_item_item = KNNBasic(k=2, sim_options=cosine_item_item_sim_options)

In [None]:
cv = cross_validate(cosine_user_user, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

In [None]:
cv = cross_validate(cosine_item_item, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Try learning with user factors

In [None]:
user_factors = SVD(n_factors=20)

In [None]:
cv = cross_validate(user_factors, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)