In [40]:
from surprise import Reader, Dataset
from surprise import SVD, KNNBasic, AlgoBase, PredictionImpossible
from surprise.accuracy import rmse, mae, mse
import pandas as pd

In [41]:
def get_true_positives(testset, threshold=4.0):
    '''Return items considered relevant per user from the testset.'''
    relevant = defaultdict(set)
    for uid, iid, true_r in testset:
        if true_r >= threshold:
            relevant[uid].add(iid)
    return relevant

In [42]:
from collections import defaultdict

def get_top_k(predictions, k=10):
    '''Return the top-K recommended items for each user from predictions.'''
    top_k = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_k[uid].append((iid, est))

    # Sort and pick top k
    for uid, user_ratings in top_k.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_k[uid] = [iid for (iid, _) in user_ratings[:k]]

    return top_k

In [43]:
def precision_at_k(top_k_preds, relevant_items, k):
    precisions = []
    for uid in top_k_preds:
        if uid in relevant_items:
            hits = len(set(top_k_preds[uid]) & relevant_items[uid])
            precisions.append(hits / k)

    if len(precisions) == 0:
        print("Warning: No overlap between predicted users and relevant users.")
        return 0.0

    return sum(precisions) / len(precisions)

def hit_rate_at_k(top_k_preds, relevant_items):
    hits = 0
    total = 0
    for uid in relevant_items:
        total += 1
        if set(top_k_preds[uid]) & relevant_items[uid]:
            hits += 1

    if hits == 0:
        print("Warning: No overlap between predicted users and relevant users.")
        return 0.0
    return hits / total

# Reading in Data

In [44]:
columns = ["feel_true_rating", "look_true_rating", "smell_true_rating", "taste_true_rating", "overall", "score", "date", "text"]
train_df = pd.read_csv("../data/RecSys_AspectSentiment_train.csv")
test_df = pd.read_csv("../data/RecSys_AspectSentiment_train.csv")

In [45]:
# create new dataframe dropping columns from columns
train_df = train_df.drop(columns=columns)
test_df = test_df.drop(columns=columns)

In [46]:
train_df['score'] = train_df[['feel_predicted_rating', 'look_predicted_rating', 'smell_predicted_rating', 'taste_predicted_rating']].mean(axis=1)
test_df['score'] = test_df[['feel_predicted_rating', 'look_predicted_rating', 'smell_predicted_rating', 'taste_predicted_rating']].mean(axis=1)

In [47]:
X_train1 = Dataset.load_from_df(train_df[["username", "beer_id", "score"]], reader=Reader(rating_scale=(0, 5)))
X_test1 = Dataset.load_from_df(test_df[["username", "beer_id", "score"]], reader=Reader(rating_scale=(0, 5)))
trainset = X_train1.build_full_trainset()
test_set = X_test1.build_full_trainset().build_testset()

In [48]:
class HybridRecommender(AlgoBase):
    def __init__(self, k=5, svd_params=None, user_knn_params=None, item_knn_params=None):
        AlgoBase.__init__(self)
        self.k = k
        self.svd_params = svd_params or {}
        self.user_knn_params = user_knn_params or {}
        self.item_knn_params = item_knn_params or {}

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)

        self.user_based = KNNBasic(sim_options={
            'name': self.user_knn_params.get("name", "pearson"),
            'user_based': True,
            'min_support': self.user_knn_params.get("min_support", 1)
        })
        self.user_based.fit(trainset)

        self.item_based = KNNBasic(sim_options={
            'name': self.item_knn_params.get("name", "cosine"),
            'user_based': False,
            'min_support': self.item_knn_params.get("min_support", 1)
        })
        self.item_based.fit(trainset)

        self.svd = SVD(**self.svd_params)
        self.svd.fit(trainset)

        return self

    def estimate(self, u, i):
        if self.trainset.knows_user(u):
            r_user_based = self.user_based.estimate(u, i)
            r_item_based = self.item_based.estimate(u, i)
            r_svd = self.svd.estimate(u, i)
            rating = (r_user_based[0] + r_item_based[0] + r_svd) / 3
        else:
            raise PredictionImpossible("User is unknown.")
        return rating


In [49]:
# First prediction
algo = HybridRecommender()
algo.fit(trainset)
predictions = algo.test(test_set)

mae(predictions)
mse(predictions)
rmse(predictions)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  0.2035
MSE: 0.0675
RMSE: 0.2597


0.2597443911417997

In [50]:
K = 10
top_k_preds = get_top_k(predictions, k=K)
relevant_items = get_true_positives(test_set, threshold=4.0)

prec = precision_at_k(top_k_preds, relevant_items, k=K)
hit = hit_rate_at_k(top_k_preds, relevant_items)

print(f'Precision@{K}: {prec:.4f}')
print(f'Hit Rate@{K}: {hit:.4f}')

Precision@10: 0.1947
Hit Rate@10: 1.0000


In [51]:
from surprise import accuracy
import itertools

# Define parameter grid
param_grid = {
    'svd_params': [
        {'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.02},
        {'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}
    ],
    'user_knn_params': [
        {'name': 'pearson', 'min_support': 1},
        {'name': 'cosine', 'min_support': 1}
    ],
    'item_knn_params': [
        {'name': 'cosine', 'min_support': 1},
        {'name': 'msd', 'min_support': 1}
    ]
}

# All combinations
combinations = list(itertools.product(
    param_grid['svd_params'],
    param_grid['user_knn_params'],
    param_grid['item_knn_params']
))

best_rmse = float("inf")
best_params = None

for svd_p, user_knn_p, item_knn_p in combinations:
    algo = HybridRecommender(svd_params=svd_p, user_knn_params=user_knn_p, item_knn_params=item_knn_p)
    algo.fit(trainset)
    predictions = algo.test(test_set)
    rmse_val = accuracy.rmse(predictions, verbose=False)

    if rmse_val < best_rmse:
        best_rmse = rmse_val
        best_params = (svd_p, user_knn_p, item_knn_p)

print("Best RMSE:", best_rmse)
print("Best Params:", best_params)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cos

In [52]:
svd_p = {'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}
user_knn_p = {'name': 'cosine', 'min_support': 1}
item_knn_p = {'name': 'msd', 'min_support': 1}

In [53]:
algo = HybridRecommender(svd_params=svd_p, user_knn_params=user_knn_p, item_knn_params=item_knn_p)
algo.fit(trainset)
predictions = algo.test(test_set)

mae(predictions)
mse(predictions)
rmse(predictions)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
MAE:  0.1242
MSE: 0.0268
RMSE: 0.1638


0.16382826919438961

In [54]:
K = 10
top_k_preds = get_top_k(predictions, k=K)
relevant_items = get_true_positives(test_set, threshold=4.0)

prec = precision_at_k(top_k_preds, relevant_items, k=K)
hit = hit_rate_at_k(top_k_preds, relevant_items)

print(f'Precision@{K}: {prec:.4f}')
print(f'Hit Rate@{K}: {hit:.4f}')

Precision@10: 0.1948
Hit Rate@10: 1.0000


In [55]:
from surprise import AlgoBase, KNNBasic, SVD
from surprise import PredictionImpossible

class HybridRecommenderMonolithic(AlgoBase):
    def __init__(self, w_user=0.3, w_item=0.3, w_svd=0.4, k=5, svd_params=None, sim_options_user=None, sim_options_item=None):
        AlgoBase.__init__(self)
        self.k = k
        self.w_user = w_user
        self.w_item = w_item
        self.w_svd = w_svd
        self.svd_params = svd_params or {}
        self.sim_options_user = sim_options_user or {'name': 'pearson', 'user_based': True, 'min_support': 1}
        self.sim_options_item = sim_options_item or {'name': 'cosine', 'user_based': False, 'min_support': 1}

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)

        # Fit SVD
        self.svd = SVD(**self.svd_params)
        self.svd.fit(trainset)

        # Fit User-based KNN
        self.user_based = KNNBasic(sim_options=self.sim_options_user)
        self.user_based.fit(trainset)

        # Fit Item-based KNN
        self.item_based = KNNBasic(sim_options=self.sim_options_item)
        self.item_based.fit(trainset)

        return self

    def estimate(self, u, i):
        if not self.trainset.knows_user(u) or not self.trainset.knows_item(i):
            raise PredictionImpossible("User or item is unknown.")

        try:
            r_user = self.user_based.estimate(u, i)
        except PredictionImpossible:
            r_user = (0.0, None)

        try:
            r_item = self.item_based.estimate(u, i)
        except PredictionImpossible:
            r_item = (0.0, None)

        try:
            r_svd = self.svd.estimate(u, i)
        except PredictionImpossible:
            r_svd = 0.0

        # Weighted combination (monolithic logic)
        rating = (self.w_user * r_user[0] +
                  self.w_item * r_item[0] +
                  self.w_svd * r_svd)

        return rating

In [56]:
algo2 = HybridRecommenderMonolithic(svd_params={'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.02})
algo2.fit(trainset)
predictions2 = algo2.test(test_set)

mae(predictions2)
mse(predictions2)
rmse(predictions2)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  0.2101
MSE: 0.0710
RMSE: 0.2664


0.2664043125371097

In [57]:
K = 10
top_k_preds = get_top_k(predictions2, k=K)
relevant_items = get_true_positives(test_set, threshold=4.0)

prec = precision_at_k(top_k_preds, relevant_items, k=K)
hit = hit_rate_at_k(top_k_preds, relevant_items)

print(f'Precision@{K}: {prec:.4f}')
print(f'Hit Rate@{K}: {hit:.4f}')

Precision@10: 0.1939
Hit Rate@10: 1.0000


In [58]:
param_grid = {
    'svd_params': [
        {'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.02},
        {'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}
    ],
    'user_knn_params': [
        {'name': 'pearson', 'min_support': 1},
        {'name': 'cosine', 'min_support': 1}
    ],
    'item_knn_params': [
        {'name': 'cosine', 'min_support': 1},
        {'name': 'msd', 'min_support': 1}
    ]
}

# All combinations
combinations = list(itertools.product(
    param_grid['svd_params'],
    param_grid['user_knn_params'],
    param_grid['item_knn_params']
))

best_rmse = float("inf")
best_params = None

for svd_p, user_knn_p, item_knn_p in combinations:
    algo = HybridRecommenderMonolithic(svd_params=svd_p, sim_options_user=user_knn_p, sim_options_item=item_knn_p)
    algo.fit(trainset)
    predictions = algo.test(test_set)
    rmse_val = accuracy.rmse(predictions, verbose=False)

    if rmse_val < best_rmse:
        best_rmse = rmse_val
        best_params = (svd_p, user_knn_p, item_knn_p)

print("Best RMSE:", best_rmse)
print("Best Params:", best_params)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cos

In [59]:
svd_p = {'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}
user_knn_p = {'name': 'cosine', 'min_support': 1, 'user_based': True}
item_knn_p = {'name': 'msd', 'min_support': 1, 'user_based': True}

In [60]:
algo2 = HybridRecommenderMonolithic(svd_params=svd_p, sim_options_user=user_knn_p, sim_options_item=item_knn_p)
algo2.fit(trainset)
predictions2 = algo2.test(test_set)

mae(predictions2)
mse(predictions2)
rmse(predictions2)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
MAE:  0.0620
MSE: 0.0067
RMSE: 0.0818


0.08184027146981102

In [61]:
K = 10
top_k_preds = get_top_k(predictions2, k=K)
relevant_items = get_true_positives(test_set, threshold=4.0)

prec = precision_at_k(top_k_preds, relevant_items, k=K)
hit = hit_rate_at_k(top_k_preds, relevant_items)

print(f'Precision@{K}: {prec:.4f}')
print(f'Hit Rate@{K}: {hit:.4f}')

Precision@10: 0.1950
Hit Rate@10: 1.0000
