In [None]:
import pandas as pd
import surprise
from dotenv import load_dotenv
from surprise import SVD, Dataset, Reader
from surprise.model_selection import GridSearchCV

load_dotenv()

In [3]:
MIN_RATING = 1
MAX_RATING = 5

train_data: pd.DataFrame = pd.read_parquet("data/ratings_train.pq")
reader = Reader(rating_scale=(MIN_RATING, MAX_RATING))
surprise_train_dataset = Dataset.load_from_df(train_data[["userId", "movieId", "rating"]], reader)
trainset: surprise.Trainset = surprise_train_dataset.build_full_trainset()

In [None]:
param_grid = {"n_factors": [75, 100, 125], "n_epochs": [25, 50, 75]}

grid_search = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=2)
grid_search.fit(surprise_train_dataset)

In [None]:
print(grid_search.best_score)
print(grid_search.best_params)

{'rmse': 0.8260481067360091, 'mae': 0.6250276877525159}
{'rmse': {'n_factors': 75, 'n_epochs': 25}, 'mae': {'n_factors': 75, 'n_epochs': 25}}


In [None]:
param_grid = {"n_factors": [25, 50, 75], "n_epochs": [10, 15, 25]}

grid_search_2 = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=2)
grid_search_2.fit(surprise_train_dataset)

In [None]:
print(grid_search_2.best_score)
print(grid_search_2.best_params)

{'rmse': 0.823731995095111, 'mae': 0.6224853277777231}
{'rmse': {'n_factors': 25, 'n_epochs': 25}, 'mae': {'n_factors': 25, 'n_epochs': 25}}


In [None]:
param_grid = {"n_factors": [10, 17, 25], "n_epochs": [25, 30, 35]}

grid_search_3 = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=2)
grid_search_3.fit(surprise_train_dataset)

In [None]:
print(grid_search_3.best_score)
print(grid_search_3.best_params)

{'rmse': 0.8218143757251857, 'mae': 0.6202771955576911}
{'rmse': {'n_factors': 17, 'n_epochs': 30}, 'mae': {'n_factors': 17, 'n_epochs': 30}}
