In [1]:
import pandas as pd
import numpy as np
import pickle

from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import cross_validate, train_test_split, GridSearchCV

In [2]:
df_reviews = pd.read_json('../../../data/interim/final/reviews.json.gz', orient="records", compression="gzip")

In [3]:
reader = Reader(rating_scale=(1, 5))

In [4]:
data = Dataset.load_from_df(df_reviews[['user_id', 'product_id', 'ratings']], reader=reader)

In [5]:
train, test = train_test_split(data, test_size=0.2, random_state=42)

In [6]:
param_grid = {"n_epochs": [5, 10], "lr_all": [0.002, 0.005], "reg_all": [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=3, refit=True)

In [7]:
gs.fit(data)

In [8]:
print(gs.best_score["rmse"])
print(gs.best_params["rmse"])
print(gs.best_estimator["rmse"])

1.164849674461313
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}
<surprise.prediction_algorithms.matrix_factorization.SVD object at 0x000002290B83DA00>


In [9]:
pickle.dump(gs.best_estimator["rmse"], open('../../../models/item_based_collaborative_filtering/svd.pkl', 'wb'))