In [1]:
import sys
import os

current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir, '..'))
sys.path.append(os.path.join(project_root, 'src'))

In [2]:
import pandas as pd
import numpy as np
import pickle


from metrics import map_score, mrr_score, ndcg_score, rmse_score, average_precision
from utils import to_user_movie_matrix, make_binary_matrix, RatingMatrix, train_test_split
from models.multi_armed_bandits_manual import EpsilonGreedyBandit, UCBBandit

In [3]:
from contextualbandits.online import LinUCB
from collections import Counter

In [4]:
def save_numpy_object(obj, filename):
    """
    It saves a numpy object as a binary file
    """
    with open(filename, 'wb') as f:
        pickle.dump(obj, f)

In [5]:
ratings = pd.read_csv('../data/ratings.dat', sep='::', engine='python', names=['UserID', 'MovieID', 'Rating', 'Timestamp'])
users = pd.read_csv('../data/users.dat', sep='::', engine='python', names=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'])
movies = pd.read_csv('../data/movies.dat', sep='::', engine='python', names=['MovieID', 'Title', 'Genres'], encoding='latin1')

data = ratings.merge(users, on='UserID').merge(movies, on='MovieID')

### Data preprocessing

We need to perform some data preprocessing to prepare context for our multi-armed bandits. The steps are: 

1. Extract the main genre for all films because each film can have sub-genre
2. Create dummy variables for all categorical columns 

In [6]:
data['FirstGenre'] = data['Genres'].apply(lambda x: x.split('|')[0])
data = pd.get_dummies(data, columns=['Gender', 'FirstGenre'])

In [7]:
context_columns = ['Age', 'Occupation'] + [col for col in data.columns if col.startswith('Gender_') or col.startswith('FirstGenre_')]

In [8]:
context = data[context_columns]

In [9]:
data['BinaryRating'] = (data['Rating'] >= 4).astype(int)

In [10]:
rewards = data['BinaryRating']

In [11]:
context.head()

Unnamed: 0,Age,Occupation,Gender_F,Gender_M,FirstGenre_Action,FirstGenre_Adventure,FirstGenre_Animation,FirstGenre_Children's,FirstGenre_Comedy,FirstGenre_Crime,...,FirstGenre_Fantasy,FirstGenre_Film-Noir,FirstGenre_Horror,FirstGenre_Musical,FirstGenre_Mystery,FirstGenre_Romance,FirstGenre_Sci-Fi,FirstGenre_Thriller,FirstGenre_War,FirstGenre_Western
0,1,10,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,56,16,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,25,12,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,25,7,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,50,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
rewards.head()

0    1
1    1
2    1
3    1
4    1
Name: BinaryRating, dtype: int32

### Simple non-contextual bandits test

We prepared a simple implementation of non-contextual bandits (Epsilon Greedy and UCBBandit):

In [13]:
#train / test split by time 
train_ratings, test_ratings = train_test_split(ratings, 'Timestamp')

#train / test matrix creation
train_matrix = to_user_movie_matrix(train_ratings)
test_matrix = to_user_movie_matrix(test_ratings) 

In [14]:
bandit = EpsilonGreedyBandit(rating_matrix=train_matrix, epsilon=0.1)

In [15]:
bandit.fit(train_matrix)

In [16]:
average_reward = bandit.evaluate(test_matrix)

In [17]:
print(f'Average reward for EpsilonGreedyBandit: {average_reward}')

Average reward for EpsilonGreedyBandit: 0.00015205291441421615


In [18]:

bandit = UCBBandit(rating_matrix=train_matrix, alpha=1)

In [19]:
bandit.fit(train_matrix)

In [20]:
average_reward = bandit.evaluate(test_matrix)
print(f'Average reward for UCBBandit: {average_reward}')


Average reward for UCBBandit: 1.6005569938338542e-05


The average reward of both bandits is quite low, so we will stick to the contextual implementation and try to test it.

---

### Contextual bandit train

In [21]:
from sklearn.model_selection import train_test_split

In [23]:
X_train, X_test, y_train, y_test, a_train, a_test = train_test_split(context, data['BinaryRating'], data['MovieID'], test_size=0.25, random_state=42)

In [24]:
model = LinUCB(nchoices=len(np.unique(data['MovieID'])), alpha=1.0)

In [25]:
model.fit(X_train.to_numpy(), a_train.to_numpy(), y_train.to_numpy())

<contextualbandits.online.LinUCB at 0x1f5aa8c54c0>

In [26]:
predictions = model.predict(X_test.to_numpy())

  pred[:] += alpha * np.sqrt(ci)


In [None]:
save_numpy_object(predictions, '../artifacts/multi_armed_bandit_predictions.pickle')

In [69]:
movie_ids = np.unique(data['MovieID'])
movie_id_to_index = {movie_id: idx for idx, movie_id in enumerate(movie_ids)}
index_to_movie_id = {idx: movie_id for movie_id, idx in movie_id_to_index.items()}

In [70]:
a_train_indices = np.array([movie_id_to_index[movie_id] for movie_id in a_train])

In [71]:
predicted_movie_ids = [index_to_movie_id[pred] for pred in predictions]

In [72]:
predictions_df = pd.DataFrame({
    'UserID': X_test.index.map(lambda idx: data.loc[idx, 'UserID']),
    'MovieID': predicted_movie_ids,
    'Rating': 1  #predicted ratings are all 1, so we just recommend movies user want to watch without rating forecasting
})

In [74]:
predictions_df = predictions_df.groupby(['UserID', 'MovieID']).agg({'Rating': 'max'}).reset_index()

In [75]:
predictions_rating_matrix = to_user_movie_matrix(predictions_df)

### Evaluation

In [39]:
#preparing of the test data to be used in otu metrics calculation block

y_test = X_test.copy()
y_test['BinaryRating'] = y_test.index.map(lambda idx: data.loc[idx, 'BinaryRating'])
y_test['UserID'] = y_test.index.map(lambda idx: data.loc[idx, 'UserID'])
y_test['MovieID'] = y_test.index.map(lambda idx: data.loc[idx, 'MovieID'])

In [40]:
test_ratings = y_test[['UserID', 'MovieID', 'BinaryRating']].rename(columns={'BinaryRating': 'Rating'})

In [41]:
rating_matrix = to_user_movie_matrix(test_ratings)

In [81]:
map_score_value = map_score(predict_matrix, rating_matrix, top=20)
mrr_score_value = mrr_score(predict_matrix, rating_matrix, top=20)
ndcg_score_value = ndcg_score(predict_matrix, rating_matrix, top=20)

print(f'LinUCB MAP: {map_score_value}')
print(f'LinUCB MRR: {mrr_score_value}')
print(f'LinUCB NDCG: {ndcg_score_value}')

LinUCB MAP: 0.19589655639717443
LinUCB MRR: 0.2297836
LinUCB NDCG: 0.16184333990828204


A MAP of indicates the mean precision of the algorithm when averaged over all queries. A value of  ~0.195 is poor and suggests that, on average, the precision of the recommended items is about 20%.

An MRR of 0.23 reflects low precision telling us that relevant items are, on average, ranked fairly low in the recommendation list (~8-9 place).

NDCG evaluates the quality of the ranking by considering the position of relevant items. An NDCG of 0.16 suggests that the overall ranking of relevant items is not effective

---

We tried to implement a recommender system using multi-armed bandit (MAB) algorithms, specifically focusing on the LinUCB algorithm. The main goal was to create a recommendation system that balances exploration and exploitation to optimize movie recommendations for users.

The model's choice was done because of the training speed and its performance compared to naive non-contextual Epsilon Greedy and UCBBandit algorithms. LinUCB algorithm implementation is based on the contextualbandits Python library. Overall approach was chosen for its ability to handle contextual information, which is crucial for personalized recommendations.

Despite many experiments conducted, the final metrics are not high and showing results similar to PageRank which works much faster compared to MAB approach. 
