In [6]:
import sys
sys.path.append('../src')
from recommender_system.MAB import LinUCB, KernelUCB, offlineEvaluate
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import rbf_kernel


import pandas as pd
import numpy as np

In [9]:
import importlib
import recommender_system.MAB
importlib.reload(recommender_system.MAB)
from recommender_system.MAB import LinUCB, KernelUCB, offlineEvaluate

In [None]:
events_path = '../../data/events.csv'
events = pd.read_csv(events_path)

item_props1 = pd.read_csv('../../data/item_properties_part1.csv')
item_props2 = pd.read_csv('../../data/item_properties_part2.csv')
item_properties = pd.concat([item_props1, item_props2], ignore_index=True)
category_tree = pd.read_csv('../../data/category_tree.csv')

In [8]:
events = events.sort_values(['visitorid', 'timestamp'])

# Assign rewards: view -> 0.1, addtocart -> 0.5, transaction -> 1.0
reward_map = {'view': 0.1, 'addtocart': 0.5, 'transaction': 1.0}
events['reward'] = events['event'].map(reward_map)

# Select top 50 items as arms
top_items = events['itemid'].value_counts().head(50).index.tolist()
events_filtered = events[events['itemid'].isin(top_items)].copy()

# Map itemid to arm index
arm_map = {item: idx for idx, item in enumerate(top_items)}
events_filtered['arm'] = events_filtered['itemid'].map(arm_map)
events_filtered = events_filtered.sort_values(['visitorid', 'timestamp'])

# Group by visitorid and compute user state (history of interactions)
grouped = events_filtered.groupby('visitorid')
data_list = []

for visitor, group in grouped:
    past_views = 0
    past_addtocart = 0
    past_transactions = 0
    for _, row in group.iterrows():
        # User state before this event: [past_views, past_addtocart, past_transactions]
        user_state = np.array([past_views, past_addtocart, past_transactions], dtype=float)
        arm = row['arm']
        reward = row['reward']
        # Context: same user state repeated for each arm (n_arms=50, n_dims=3)
        contexts = np.tile(user_state, (50, 1))
        row_data = np.concatenate([[arm, reward], contexts.flatten()])
        data_list.append(row_data)
        # Update past counts
        if row['event'] == 'view':
            past_views += 1
        elif row['event'] == 'addtocart':
            past_addtocart += 1
        elif row['event'] == 'transaction':
            past_transactions += 1

data_array = np.array(data_list)

# Save to dataset.txt
np.savetxt('../../data/dataset.txt', data_array)
print("Dataset saved to ../../data/dataset.txt")

Dataset saved to ../../data/dataset.txt


In [2]:
data = np.loadtxt('../../data/dataset.txt')
arms, rewards, contexts = data[:,0], data[:,1], data[:,2:]
arms = arms.astype(int)
rewards = rewards.astype(float)
contexts = contexts.astype(float)
n_arms = len(np.unique(arms))
n_events = len(contexts)
n_dims = int(len(contexts[0])/n_arms)
contexts = contexts.reshape(n_events, n_arms, n_dims)
print(f"n_arms: {n_arms}, n_dims: {n_dims}, n_events: {n_events}")

n_arms: 50, n_dims: 3, n_events: 64301


In [None]:
# hyperparameter optimization for LinUCB
alpha_list = np.logspace(-3,0,num= 20)    # generate valid alpha values
n_rounds = 800                            # set the initial number of round
max_reward = -np.inf                      # set the initial max reward
best_alpha = None                         # set the initial best alpha
results = []

# run with a set of alpha
for alpha in alpha_list:
    grid_mab = LinUCB(n_arms, n_dims, alpha)
    temp_result = offlineEvaluate(grid_mab, arms, rewards, contexts, n_rounds)
    temp_result = np.mean(temp_result)
    results.append(temp_result)
    if temp_result > max_reward:  # get the maximum mean rewards with the best alpha
        best_alpha = alpha
        max_reward = temp_result

# show the plot
plt.rcParams["figure.figsize"] = (20,5)
plt.plot(alpha_list,results)
plt.xticks(alpha_list, rotation = 'vertical')
plt.title('the best alpha')
plt.ylabel('Mean Reward')
plt.xlabel('parameter - alpha')
plt.show()
print("The best alpha for LinUCB is", round(best_alpha, 3), "with reward", max_reward)

In [None]:
# hyperparameter optimization for KernelUCB
gamma_list = np.linspace(0, 1, num = 11)[1:]       # generate valid gamma values & remove 0.0
eta_list = np.linspace(0, 0.5, num = 11)[1:]       # generate valid eta values & remove 0.0
n_rounds = 800                                     # set the initial number of round
gam_max_reward, eta_max_reward = -np.inf, -np.inf  # set the initial max reward for gamma and eta

# get the maximum mean rewards with the the initial eta
eta = 0.1
for gam in gamma_list:
    grid_mab = KernelUCB(n_arms, n_dims, gam, eta, rbf_kernel)
    temp_result = offlineEvaluate(grid_mab, arms, rewards, contexts, n_rounds)
    temp_result = np.mean(temp_result)
    if temp_result > gam_max_reward:      # get the maximum mean rewards with the best eta and gamma
        gam_max_reward = temp_result  
        best_gamma = gam

print("The best gamma for Kernel UCB is {", round(best_gamma, 3), "} with reward", gam_max_reward)

for eta in eta_list:
    grid_mab = KernelUCB(n_arms, n_dims, best_gamma, eta, rbf_kernel)
    temp_result = offlineEvaluate(grid_mab, arms, rewards, contexts, n_rounds)
    temp_result = np.mean(temp_result)
    if temp_result > eta_max_reward:   # get the maximum mean rewards with the best gamma
        eta_max_reward = temp_result
        best_eta =  eta
        
print("The best eta for Kernel UCB is {", round(best_eta, 3), "} with reward", eta_max_reward)

In [None]:
# plotting code here LinUCB vs Kernel UCB
best_lin_results, best_ker_results, round_list = [], [], []        # create lists for results
n_rounds = 800                                                     # the total number of rounds 
best_lin_sum, best_ker_sum = 0, 0                                  # set the initial reward sum

best_lin_mab = LinUCB(n_arms, n_dims, best_alpha)
best_results_LinUCB = offlineEvaluate(best_lin_mab, arms, rewards, contexts,800)

# run UCB algorithms
best_kernel_mab = KernelUCB(n_arms, n_dims, best_gamma, best_eta , rbf_kernel)
best_results_KernelUCB = offlineEvaluate(best_kernel_mab, arms, rewards, contexts, 800)

for n_round in range(1, n_rounds + 1):
    best_lin_sum += best_results_LinUCB[n_round-1] 
    best_ker_sum += best_results_KernelUCB[n_round-1]
    
    # append cumulative rewards
    best_lin_results.append(best_lin_sum/n_round)
    best_ker_results.append(best_ker_sum/n_round)
    round_list.append(n_round)

# plot 
plt.rcParams["figure.figsize"] = (8,5)
plt.plot(round_list, best_lin_results, label = "LinUCB")
plt.plot(round_list, best_ker_results, label = "kernelUCB")

plt.ylabel('Cumulative Reward Per-Round')
plt.xlabel('Rounds')
plt.legend()
plt.show()