In [1]:
#configure plotting
%matplotlib inline
from matplotlib import pyplot as plt
from IPython.display import display

In [2]:
import numpy as np
import random
from models.simple import SimpleMatrixFactorization
from bandit_algorithms import BanditAlgorithms

In [3]:
# Reload all packages - make debugging easier
%load_ext autoreload
%autoreload 2

In [4]:
np.random.seed(0)

---

# Bandit Demo using Edward Simple Matrix Factorization

In [5]:
R = np.array([[1,3,3,4,5],
             [1,2,4,3,5],
             [5,3,2,5,1]])

mask_trin = np.array([[1,1,1,1,1],
                      [1,0,0,0,0],
                      [1,1,1,1,1]])

In [6]:
model = SimpleMatrixFactorization(ratings_matrix=R, mask=mask_trin, hidden_dim=2, batch_size=5)

In [7]:
n_users, n_items = R.shape
epsilon = 0.75
gamma = 0.9

In [8]:
regret = 0
for user in range(n_users):
    bandit_algos = BanditAlgorithms(user, R, mask_trin, gamma, model, retrain=False)
    # egreedy is random, so we run it a few times
    mean_egreedy_score = np.mean([bandit_algos.get_score(epsilon,egreedy=True,ucb=False,thompson=False) for _ in range(100)])
    regret += bandit_algos.get_best_score() - mean_egreedy_score
    
    print('user:',user,'regret:',regret)

user: 0 regret: 0.0
user: 1 regret: 0.47444
user: 2 regret: 0.47444


In [9]:
regret = 0
for user in range(n_users):

    bandit_algos = BanditAlgorithms(user, R, mask_trin, gamma, model, retrain=False)

    ucb_score = bandit_algos.get_score(epsilon=0,egreedy=False,ucb=True,thompson=False)
    regret += bandit_algos.get_best_score() - ucb_score
    
    print('user:',user,'regret:',regret)

user: 0 regret: 0
user: 1 regret: 0.261
user: 2 regret: 0.261


In [10]:
regret = 0
for user in range(n_users):

    bandit_algos = BanditAlgorithms(user, R, mask_trin, gamma, model, retrain=False)
    
    thompson_score = bandit_algos.get_score(epsilon=0,egreedy=False,ucb=False,thompson=True)
    regret += bandit_algos.get_best_score() - thompson_score
    
    print('user:',user,'regret:',regret)

user: 0 regret: 0
user: 1 regret: 0.081
user: 2 regret: 0.081
