In [None]:
# %pip install scikit-surprise==1.1.0 # pickle

In [None]:
from surprise import accuracy, Reader, Dataset, SVD
from surprise.model_selection import cross_validate

In [None]:
import pandas as pd
import pathlib
DATA_DIR = pathlib.Path().resolve().parent / "data"
print(DATA_DIR)
DATA_DIR.exists()

In [None]:
dataset = DATA_DIR / 'ratings_small.csv'
dataset.exists()

In [None]:
df = pd.read_csv(dataset)
df['rating'].dropna(inplace=True)
df.head()

In [None]:
df.rating.max(), df.rating.min()

In [None]:
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)

In [None]:
algo = SVD(verbose=True, n_epochs=20)
cross_validate(algo, data, measures=['RMSE', "MAE"], cv=4, verbose=True)

In [None]:
trainset = data.build_full_trainset()
algo.fit(trainset)

In [None]:
testset = trainset.build_testset()
predictions = algo.test(testset)
# RMSE should be low as we are biased
accuracy.rmse(predictions, verbose=True)
# accuracy.mae(predictions, verbose=True)

In [None]:
sample_row = df.sample(n=1)
userId = sample_row['userId'].values[0]
movieId = sample_row['movieId'].values[0]
print(userId, movieId)

In [None]:
pred = algo.predict(uid=userId, iid=movieId)
pred.est

In [None]:
import pickle

In [None]:
algo_data = {"model": algo}
with open('model.pkl', 'wb') as f:
    pickle.dump(algo_data, f)

In [None]:
model_algo= None
with open('model.pkl', 'rb') as f:
    model_data_loaded = pickle.load(f)
    model_algo = model_data_loaded.get('model')

In [None]:
model_algo.predict(uid=userId, iid=movieId).est

In [None]:
sample_rows = df.sample(n=10).to_dict('records')
for row in sample_rows:
    userId = row['userId']
    movieId = row['movieId']
    pred = model_algo.predict(uid=userId, iid=movieId).est
    print(userId, movieId, pred)