In [30]:
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split

In [31]:
# Load the movielens-100k dataset  UserID::MovieID::Rating::Timestamp
data = Dataset.load_builtin('ml-100k')
trainset, testset = train_test_split(data, test_size=.15)

In [32]:
# Use user_based true/false to switch between user-based or item-based collaborative filtering
# user_based = True => user based filtering (based on 2 users preferences, things are recommended)
# user_based = false => item-item filtering (based on 2 users contents preferences, things are recommended)
algo = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': True})
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x127fac560>

In [33]:
# we can now query for specific predicions
uid = str(196)  # raw user id
iid = str(302)  # raw item id

In [34]:
# get a prediction for specific users and items.
pred = algo.predict(uid, iid, verbose=True)

user: 196        item: 302        r_ui = None   est = 3.81   {'actual_k': 15, 'was_impossible': False}


In [35]:
# run the trained model against the testset
test_pred = algo.test(testset)

In [36]:
test_pred

[Prediction(uid='435', iid='161', r_ui=3.0, est=3.2619071182336357, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='591', iid='283', r_ui=4.0, est=3.887861642135863, details={'actual_k': 32, 'was_impossible': False}),
 Prediction(uid='327', iid='1170', r_ui=4.0, est=2.608023146961393, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='351', iid='245', r_ui=3.0, est=4.197516971804881, details={'actual_k': 25, 'was_impossible': False}),
 Prediction(uid='328', iid='29', r_ui=3.0, est=2.6741633973193966, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='72', iid='427', r_ui=5.0, est=4.551650894738311, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='708', iid='508', r_ui=4.0, est=3.0003749625294023, details={'actual_k': 34, 'was_impossible': False}),
 Prediction(uid='190', iid='245', r_ui=4.0, est=3.0095167112554058, details={'actual_k': 27, 'was_impossible': False}),
 Prediction(uid='59', iid='675', r_ui=5.0, es

In [37]:
# get RMSE
print("User-based Model : Test Set")
accuracy.rmse(test_pred, verbose=True)

User-based Model : Test Set
RMSE: 0.9115


0.9115454336311192