In [3]:
from surprise import Reader, Dataset
from surprise import SVD, KNNBasic, AlgoBase, PredictionImpossible
from surprise.accuracy import rmse, mae, mse
import pandas as pd

In [4]:
columns = ["feel_true_rating", "look_true_rating", "smell_true_rating", "taste_true_rating", "overall", "score", "aspect", "date", "text", "true_rating"]
train_df = pd.read_csv("../data/train_wide.csv")
test_df = pd.read_csv("../data/test_wide.csv")
model_evaluation_df = pd.read_csv("../data/model_evaluation_results.csv")
model_evaluation_df = model_evaluation_df.drop_duplicates(subset=['beer_id', 'username'])

In [5]:
# Perform a left join of train_df with model_evaluation_df on 'beer_id' and 'username'
train_merged = pd.merge(train_df, model_evaluation_df, on=['beer_id', 'username'], how='left')

# Perform a left join of test_df with model_evaluation_df on 'beer_id' and 'username'
test_merged = pd.merge(test_df, model_evaluation_df, on=['beer_id', 'username'], how='left')

In [6]:
# create new dataframe dropping columns from columns
train_merged = train_merged.drop(columns=columns)
test_merged = test_merged.drop(columns=columns)

In [7]:
train_merged['score'] = train_merged[['feel_predicted_rating', 'look_predicted_rating', 'smell_predicted_rating', 'taste_predicted_rating', 'predicted_rating']].mean(axis=1)
test_merged['score'] = test_merged[['feel_predicted_rating', 'look_predicted_rating', 'smell_predicted_rating', 'taste_predicted_rating', 'predicted_rating']].mean(axis=1)

In [8]:
X_train1 = Dataset.load_from_df(train_merged[["username", "beer_id", "score"]], reader=Reader(rating_scale=(0, 5)))
X_test1 = Dataset.load_from_df(test_merged[["username", "beer_id", "score"]], reader=Reader(rating_scale=(0, 5)))
trainset = X_train1.build_full_trainset()
test_set = X_test1.build_full_trainset().build_testset()

In [29]:
class HybridRecommender(AlgoBase):
    def __init__(self, k=5, svd_params=None, user_knn_params=None, item_knn_params=None):
        AlgoBase.__init__(self)
        self.k = k
        self.svd_params = svd_params or {}
        self.user_knn_params = user_knn_params or {}
        self.item_knn_params = item_knn_params or {}

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)

        self.user_based = KNNBasic(sim_options={
            'name': self.user_knn_params.get("name", "pearson"),
            'user_based': True,
            'min_support': self.user_knn_params.get("min_support", 1)
        })
        self.user_based.fit(trainset)

        self.item_based = KNNBasic(sim_options={
            'name': self.item_knn_params.get("name", "cosine"),
            'user_based': False,
            'min_support': self.item_knn_params.get("min_support", 1)
        })
        self.item_based.fit(trainset)

        self.svd = SVD(**self.svd_params)
        self.svd.fit(trainset)

        return self

    def estimate(self, u, i):
        if self.trainset.knows_user(u):
            r_user_based = self.user_based.estimate(u, i)
            r_item_based = self.item_based.estimate(u, i)
            r_svd = self.svd.estimate(u, i)
            rating = (r_user_based[0] + r_item_based[0] + r_svd) / 3
        else:
            raise PredictionImpossible("User is unknown.")
        return rating


In [31]:
# First prediction
algo = HybridRecommender()
algo.fit(trainset)
predictions = algo.test(test_set)

mae(predictions)
mse(predictions)
rmse(predictions)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  0.6009
MSE: 0.5014
RMSE: 0.7081


0.7081009973147933

In [32]:
from surprise import accuracy
import itertools

# Define parameter grid
param_grid = {
    'svd_params': [
        {'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.02},
        {'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}
    ],
    'user_knn_params': [
        {'name': 'pearson', 'min_support': 1},
        {'name': 'cosine', 'min_support': 1}
    ],
    'item_knn_params': [
        {'name': 'cosine', 'min_support': 1},
        {'name': 'msd', 'min_support': 1}
    ]
}

# All combinations
combinations = list(itertools.product(
    param_grid['svd_params'],
    param_grid['user_knn_params'],
    param_grid['item_knn_params']
))

best_rmse = float("inf")
best_params = None

for svd_p, user_knn_p, item_knn_p in combinations:
    algo = HybridRecommender(svd_params=svd_p, user_knn_params=user_knn_p, item_knn_params=item_knn_p)
    algo.fit(trainset)
    predictions = algo.test(test_set)
    rmse_val = accuracy.rmse(predictions, verbose=False)

    if rmse_val < best_rmse:
        best_rmse = rmse_val
        best_params = (svd_p, user_knn_p, item_knn_p)

print("Best RMSE:", best_rmse)
print("Best Params:", best_params)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cos

In [41]:
from surprise import AlgoBase, KNNBasic, SVD
from surprise import PredictionImpossible

class HybridRecommenderMonolithic(AlgoBase):
    def __init__(self, w_user=0.3, w_item=0.3, w_svd=0.4, k=5, svd_params=None, sim_options_user=None, sim_options_item=None):
        AlgoBase.__init__(self)
        self.k = k
        self.w_user = w_user
        self.w_item = w_item
        self.w_svd = w_svd
        self.svd_params = svd_params or {}
        self.sim_options_user = sim_options_user or {'name': 'pearson', 'user_based': True, 'min_support': 1}
        self.sim_options_item = sim_options_item or {'name': 'cosine', 'user_based': False, 'min_support': 1}

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)

        # Fit SVD
        self.svd = SVD(**self.svd_params)
        self.svd.fit(trainset)

        # Fit User-based KNN
        self.user_based = KNNBasic(sim_options=self.sim_options_user)
        self.user_based.fit(trainset)

        # Fit Item-based KNN
        self.item_based = KNNBasic(sim_options=self.sim_options_item)
        self.item_based.fit(trainset)

        return self

    def estimate(self, u, i):
        if not self.trainset.knows_user(u) or not self.trainset.knows_item(i):
            raise PredictionImpossible("User or item is unknown.")

        try:
            r_user = self.user_based.estimate(u, i)
        except PredictionImpossible:
            r_user = (0.0, None)

        try:
            r_item = self.item_based.estimate(u, i)
        except PredictionImpossible:
            r_item = (0.0, None)

        try:
            r_svd = self.svd.estimate(u, i)
        except PredictionImpossible:
            r_svd = 0.0

        # Weighted combination (monolithic logic)
        rating = (self.w_user * r_user[0] +
                  self.w_item * r_item[0] +
                  self.w_svd * r_svd)

        return rating

In [42]:
algo2 = HybridRecommenderMonolithic(svd_params={'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.02})
algo2.fit(trainset)
predictions2 = algo2.test(test_set)

mae(predictions2)
mse(predictions2)
rmse(predictions2)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.2703
MSE: 2.3462
RMSE: 1.5317


1.5317291861870086

In [43]:
param_grid = {
    'svd_params': [
        {'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.02},
        {'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}
    ],
    'user_knn_params': [
        {'name': 'pearson', 'min_support': 1},
        {'name': 'cosine', 'min_support': 1}
    ],
    'item_knn_params': [
        {'name': 'cosine', 'min_support': 1},
        {'name': 'msd', 'min_support': 1}
    ]
}

# All combinations
combinations = list(itertools.product(
    param_grid['svd_params'],
    param_grid['user_knn_params'],
    param_grid['item_knn_params']
))

best_rmse = float("inf")
best_params = None

for svd_p, user_knn_p, item_knn_p in combinations:
    algo = HybridRecommenderMonolithic(svd_params=svd_p, sim_options_user=user_knn_p, sim_options_item=item_knn_p)
    algo.fit(trainset)
    predictions = algo.test(test_set)
    rmse_val = accuracy.rmse(predictions, verbose=False)

    if rmse_val < best_rmse:
        best_rmse = rmse_val
        best_params = (svd_p, user_knn_p, item_knn_p)

print("Best RMSE:", best_rmse)
print("Best Params:", best_params)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cos