In [1]:
import pandas as pd 
import numpy as np
from surprise import Dataset, Reader
from surprise import SVD, NMF, KNNBasic
from surprise.accuracy import rmse, mse 
from surprise.model_selection import GridSearchCV, train_test_split
from typing import List

In [2]:
filePathReviewsF: str = "reviews_filtered.csv"

In [3]:
from surprise.dataset import DatasetAutoFolds, Dataset
from surprise import Trainset
authorId: str ="AuthorId"
recipeId: str ="RecipeId"

df_food_reviews = pd.read_csv(filePathReviewsF, usecols=["RecipeId", "AuthorId", "Rating"])
df_food_reviews = df_food_reviews[["AuthorId", "RecipeId", "Rating"]]

reader = Reader(rating_scale=(1,5))
rd : DatasetAutoFolds = Dataset.load_from_df(df_food_reviews,reader)

In [4]:
param_grid_svd = {
    "lr_all": [0.005, 0.01, 0.02],
    "reg_all": [0.02, 0.1, 0.4],
    "n_epochs": [20, 50, 80],
    "n_factors": [20, 50, 100]
}

param_grid_nmf = {
    "n_factors": [50, 100, 200],
    "reg_pu": [0.02, 0.1, 0.5],
    "reg_qi": [0.02, 0.1, 0.5],
    "reg_bu": [0.005, 0.02, 0.1],
    "reg_bi": [0.005, 0.02, 0.1],
    "lr_bu": [0.002, 0.005, 0.01],
    "lr_bi": [0.002, 0.005, 0.01]
}

param_grid_knn = {
    "k": [10, 20, 40],
    "min_k": [2, 3],
    "sim_options": {
        "name": ["msd", "pearson"],
        "user_based": [False, True]
    }
}

In [11]:
# Perform grid search for SVD
gs_svd = GridSearchCV(SVD, param_grid=param_grid_svd,
                  measures=["rmse", "mae"], cv=5)
gs_svd.fit(rd)

# Get best score and parameters for SVD
print("Best RMSE score for SVD:", gs_svd.best_score["rmse"])
print("Best RMSE parameters for SVD:", gs_svd.best_params["rmse"])
print("Best MAE score for SVD:", gs_svd.best_score["mae"])
print("Best MAE parameters for SVD:", gs_svd.best_params["mae"])

Best RMSE score for SVD: 1.4789196245536333
Best RMSE parameters for SVD: {'lr_all': 0.005, 'reg_all': 0.02, 'n_epochs': 50, 'n_factors': 20}
Best MAE score for SVD: 1.0431158266950842
Best MAE parameters for SVD: {'lr_all': 0.01, 'reg_all': 0.02, 'n_epochs': 80, 'n_factors': 20}


In [12]:

# Perform grid search for KNN
gs_knn = GridSearchCV(KNNBasic, param_grid=param_grid_knn,
                      measures=["rmse", "mae"], cv=5)
gs_knn.fit(rd)
print("Best RMSE score for KNN:", gs_knn.best_score["rmse"])
print("Best RMSE parameters for KNN:", gs_knn.best_params["rmse"])
print("Best MAE score for KNN:", gs_knn.best_score["mae"])
print("Best MAE parameters for KNN:", gs_knn.best_params["mae"])

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix

In [5]:
# Perform grid search for NMF
gs_nmf = GridSearchCV(NMF, param_grid=param_grid_nmf, measures=["rmse", "mae"], cv=5)
gs_nmf.fit(rd)

# Get best score and parameters for NMF
print("Best RMSE score for NMF:", gs_nmf.best_score["rmse"])
print("Best RMSE parameters for NMF:", gs_nmf.best_params["rmse"])
print("Best MAE score for NMF:", gs_nmf.best_score["mae"])
print("Best MAE parameters for NMF:", gs_nmf.best_params["mae"])

Best RMSE score for NMF: 1.6088902722586753
Best RMSE parameters for NMF: {'n_factors': 200, 'reg_pu': 0.1, 'reg_qi': 0.1, 'reg_bu': 0.1, 'reg_bi': 0.1, 'lr_bu': 0.002, 'lr_bi': 0.005}
Best MAE score for NMF: 1.087558210420603
Best MAE parameters for NMF: {'n_factors': 50, 'reg_pu': 0.02, 'reg_qi': 0.02, 'reg_bu': 0.005, 'reg_bi': 0.005, 'lr_bu': 0.002, 'lr_bi': 0.005}
