In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import pickle
from source.utils import train_test_split_feature
from surprise import Reader, Dataset
from surprise import BaselineOnly
from surprise.model_selection import GridSearchCV
from source.baseline import BaseLineRecommender

In [3]:
feature = pd.read_csv("data/feature.csv")
cols = [
        "user_id",
        'business_id',
        "review_stars",
        "review_date"
        ]
selected_feature = feature[cols]
train_set, test_set = train_test_split_feature(selected_feature.copy())
reader = Reader(rating_scale=(1, 5))
trainset = Dataset.load_from_df(train_set[['user_id', 'business_id', 'review_stars']], reader)
testset = Dataset.load_from_df(test_set[['user_id', 'business_id', 'review_stars']], reader)
fullset = Dataset.load_from_df(selected_feature[['user_id', 'business_id', 'review_stars']], reader)
trainset = trainset.build_full_trainset()
testset = testset.build_full_trainset().build_testset()

In [36]:
# Tune the hyperparameter for ALS
param_grid = {'bsl_options': {'method': ['als'],
                              'n_epochs': [30], 
                              'reg_u': np.arange(10, 31, 5).tolist(),
                              'reg_i': np.arange(10, 31, 5).tolist()}
             }
grid_search = GridSearchCV(BaselineOnly, param_grid, measures=['rmse','mae'], cv=3)
grid_search.fit(fullset)
result_pickle = "result/baselinemodel_hyperparameter_tuning_ALS.pkl"
cv_result = pd.DataFrame(grid_search.cv_results)
for hyperparameter in ['method', 'n_epochs', 'reg_u', 'reg_i']:
    cv_result[hyperparameter] = cv_result.apply(lambda x:x['param_bsl_options'][hyperparameter], axis=1)
with open(result_pickle, 'wb') as handle:
    pickle.dump(cv_result, handle)
print(f"The best RMSE is: {grid_search.best_score['rmse']}")
print(f"The best hyperparameters for this RMSE is: {grid_search.best_params['rmse']}")
print(f"The best MAE is: {grid_search.best_score['mae']}")
print(f"The best hyperparameters for this MAE is: {grid_search.best_params['mae']}")

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimati

In [38]:
# Tune the hyperparameter for SGD
param_grid = {'bsl_options': {'method': ['sgd'],
                              'n_epochs': [30], 
                              'reg': [0.01, 0.05, 0.1, 0.5, 1, 5],
                              'learning_rate': [0.001, 0.005, 0.01, 0.05]}
             }
grid_search = GridSearchCV(BaselineOnly, param_grid, measures=['rmse','mae'], cv=3)
grid_search.fit(fullset)
result_pickle = "result/baselinemodel_hyperparameter_tuning_SGD.pkl"
cv_result = pd.DataFrame(grid_search.cv_results)
for hyperparameter in ['method', 'n_epochs', 'reg', 'learning_rate']:
    cv_result[hyperparameter] = cv_result.apply(lambda x:x['param_bsl_options'][hyperparameter], axis=1)
with open(result_pickle, 'wb') as handle:
    pickle.dump(cv_result, handle)
print(f"The best RMSE is: {grid_search.best_score['rmse']}")
print(f"The best hyperparameters for this RMSE is: {grid_search.best_params['rmse']}")
print(f"The best MAE is: {grid_search.best_score['mae']}")
print(f"The best hyperparameters for this MAE is: {grid_search.best_params['mae']}")

Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimati

KeyError: ('reg_u', 'occurred at index 0')

In [4]:
blr = BaseLineRecommender()

In [5]:
blr.fit(trainset)

Estimating biases using sgd...


In [6]:
blr.rmse(testset)

RMSE: 1.2717


1.2717075435379266

In [7]:
blr.mae(testset)

MAE:  1.0048


1.0047525717485146