In [10]:
import pandas as pd
import numpy as np
import pickle

from surprise import KNNBasic, Dataset, Reader, accuracy
from surprise.model_selection import cross_validate, train_test_split, GridSearchCV

In [11]:
df_reviews = pd.read_json('../../../data/interim/final/reviews.json.gz', orient="records", compression="gzip")

In [12]:
reader = Reader(rating_scale=(1, 5))

In [13]:
data = Dataset.load_from_df(df_reviews[['user_id', 'product_id', 'ratings']], reader=reader)

In [14]:
train, test = train_test_split(data, test_size=0.2, random_state=42)

In [15]:
param_grid = {
    'bsl_options': {
        'method': ['als', 'sgd'],
        'reg': [1, 2],
    },
    'k': [2, 3],
    'sim_options': {
        'name': ['msd', 'cosine'],
        'min_support': [1, 5],
        'user_based': [False],
    },
}

gs = GridSearchCV(KNNBasic, param_grid, measures=["rmse"], cv=3, refit=True)

In [16]:
gs.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matr

In [17]:
print(gs.best_score["rmse"])
print(gs.best_params["rmse"])
print(gs.best_estimator["rmse"])

1.1957884568041246
{'bsl_options': {'method': 'als', 'reg': 1}, 'k': 2, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}}
<surprise.prediction_algorithms.knns.KNNBasic object at 0x000001E1D788C250>


In [18]:
pickle.dump(gs.best_estimator["rmse"], open('../../../models/item_based_collaborative_filtering/knn.pkl', 'wb'))