In [1]:
import sys
import os

current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir, '..'))
sys.path.append(os.path.join(project_root, 'src'))

In [2]:
import pandas as pd
import numpy as np


from metrics import map_score, mrr_score, ndcg_score, rmse_score, average_precision
from utils import train_test_split, to_user_movie_matrix, make_binary_matrix, RatingMatrix
from models.multi_armed_bandits import EpsilonGreedyBandit, UCBBandit

In [3]:
ratings = pd.read_csv('../data/ratings.dat', sep='::', engine='python', names=['UserID', 'MovieID', 'Rating', 'Timestamp'])
users = pd.read_csv('../data/users.dat', sep='::', engine='python', names=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'])
movies = pd.read_csv('../data/movies.dat', sep='::', engine='python', names=['MovieID', 'Title', 'Genres'], encoding='latin1')

data = ratings.merge(users, on='UserID').merge(movies, on='MovieID')

In [4]:
#train / test split by time 
train_ratings, test_ratings = train_test_split(ratings, 'Timestamp')

#train / test matrix creation
train_matrix = to_user_movie_matrix(train_ratings)
test_matrix = to_user_movie_matrix(test_ratings) 

### Modeling user preferences

In [5]:
bandit = EpsilonGreedyBandit(rating_matrix=train_matrix, epsilon=0.1)

In [6]:
bandit.fit(train_matrix)

In [7]:
average_reward = bandit.evaluate(test_matrix)

In [8]:
print(f'Average Reward: {average_reward}')

Average Reward: 8.803063466086197e-05


In [9]:
# Initialize and train the improved bandit
bandit = UCBBandit(rating_matrix=train_matrix, alpha=1)  # or use EpsilonGreedyBandit with decay


In [10]:
bandit.fit(train_matrix)

In [11]:

# Evaluate the improved bandit
average_reward = bandit.evaluate(test_matrix)
print(f'Average Reward: {average_reward}')


Average Reward: 1.6005569938338542e-05
