In [5]:
import pandas as pd

In [6]:
#Read in ratings data

ratings = pd.read_csv('~/Movie_Recommender/ratings.csv')

In [7]:
#Hyperparameter search for SVD recommender

from surprise import accuracy, Dataset, SVD, KNNBasic, Reader
from surprise.model_selection import train_test_split, GridSearchCV

#Format the data for surprise library
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings[["userId", "movieId", "rating"]], reader=reader)

#Grid search cross-validation for optimal hyperparameters
param_grid = {"n_factors": [20, 60, 100, 140], "n_epochs": [5, 10, 20], "lr_all": [0.005, 0.01], "reg_all": [0.1, 0.3, 0.6]}
gscv = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3)
gscv.fit(data)

#Best CV score and optimal hyperparameters
print(gscv.best_score)
print(gscv.best_params)

{'rmse': 0.8682200430401187}
{'rmse': {'n_factors': 140, 'n_epochs': 20, 'lr_all': 0.01, 'reg_all': 0.1}}


In [8]:
#Save best model

import joblib
algo = gscv.best_estimator["rmse"]
joblib.dump(algo, 'SVD.joblib') 

['SVD.joblib']