In [18]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import KNNWithMeans
from surprise import SVD
from surprise.model_selection import GridSearchCV
from surprise.model_selection import train_test_split
from surprise import accuracy

### Surprise comes loaded with Movielens data set

In [30]:
movielens = Dataset.load_builtin('ml-100k')
trainset, testset = train_test_split(movielens, test_size=0.25)

## Memory based KNN approach

In [31]:
sim_options = {
    'name': 'cosine',
    'user_based': True,
}

knn = KNNWithMeans(sim_options=sim_options)
knn.fit(trainset)
predictions = knn.test(testset)
print(f'Accuracy of KNN approach {accuracy.rmse(predictions)}')

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9559
Accuracy of KNN approach 0.9559333024898639


In [36]:
# Prediction on single user and item
uid = str(196)  
iid = str(302)  
pred = knn.predict(uid, iid, r_ui=4, verbose=False)
print(f'Rating of User {uid} for item {iid} is {pred.est}')

Rating of User 196 for item 302 is 4.454878722917435


## Mode based approach - SVD

In [37]:
svd = SVD()
predictions = svd.fit(trainset).test(testset)
print(f'Accuracy of SVD approach {accuracy.rmse(predictions)}')

RMSE: 0.9390
Accuracy of SVD approach 0.938988201323034


In [38]:
uid = str(196)  
iid = str(302)
pred = svd.predict(uid, iid, r_ui=4, verbose=False)
print(f'Rating of User {uid} for item {iid} is {pred.est}')

Rating of User 196 for item 302 is 4.223163301790177


## Grid Search

In [45]:
sim_options = {
    "name": ["msd", "cosine"],
    "user_based": [False, True],
}
param_grid = {'sim_options': sim_options}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse", "mae"], cv=3, refit=True)

gs.fit(movielens)

print(f'Best score for KNN model is {gs.best_score["rmse"]}')
print(f'Best parameters for KNN model are {gs.best_params["rmse"]}')

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Best score for KNN model is 0.941

In [47]:
print(f'Rating of User {uid} for item {iid} is {gs.predict(uid, iid).est}')

Rating of User 196 for item 302 is 4.126951272890875


### Gridsearch model suggest that we use msd similarity metric and should be item based rather than user based