<img width="200" style="display:inline" src="https://yt3.ggpht.com/-gdE0kcO30lI/AAAAAAAAAAI/AAAAAAAAAAA/km_-04EMwJI/s900-c-k-no-mo-rj-c0xffffff/photo.jpg"></img>
<img style="display:inline" src="https://blogs.mathworks.com/images/loren/2016/multiarmedbandit.jpg"></img>

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib; matplotlib.rcParams['figure.figsize'] = (15,3)
from IPython.display import display

In [2]:
import numpy as np
import os

In [3]:
# Reload all packages - make debugging easier
%load_ext autoreload
%autoreload 2

---

In [4]:
from exp.utils import load_R, desparsify_2, prepare_test_users, make_binary

In [5]:
R = load_R()

# One reason our model doesn't seem to work well may be due to the matrix being too sparse.
R = desparsify_2(R)

# Binary!
R = make_binary(R)

# The point of this is to check that if we can fine-tune a model and still obtain meaningful uncertainty **updates** per user.
# First, we pick a significant amount of users to check.
dense_users, spars_users, train_mask, test_masks = \
    prepare_test_users(R, NUM_USERS_DENSE = 20, NUM_USERS_SPARS = 20, PERC_DROP = 0.3)

R contains 100000 ratings
Before desparsify: % of items:  0.0630466936422
After desparsify: % of items:  0.373593572446
Size:  (354, 353)


In [6]:
# sanity check
print(np.sum(train_mask))
print(np.sum([np.sum(m) for _, m in test_masks.items()]))

33199
1421


---

Choose our model.

In [9]:
from pmf import PMFBern
model = PMFBern(ratingMatrix=R)

Some helper functions:

In [10]:
def plot_R(rhats, r=None, title=''):
    _, n_items = rhats.shape
    f, axes = plt.subplots(1, n_items, sharex=True, sharey=True)
    i = 0
    for j in range(n_items):
        if r is not None:
            axes[j].axvline(x=r[j], color='r', alpha=0.4)
        axes[j].hist(rhats[:,j], histtype='stepfilled', normed=True, bins=100)
    plt.xlim([0,6])
    f.suptitle(title)
    plt.show()
    
# Empirical Entropy of Ratings
from empirical_entropy import empirical_entropy
def get_entropy_data(model, user_index, intended_mask, num_samples=500):
    samples = model.sample_for_user(user_index, num_samples)
    _, per_item_entropy = empirical_entropy(samples)
    mean_all_entropy = np.mean(per_item_entropy[np.where(R[user_index,:] > 0)]) # all items we could see
    mean_intended_entropy = np.mean(per_item_entropy[np.where(intended_mask)]) # just elements we meant to test on
    return mean_all_entropy, mean_intended_entropy

# Latent Variable Entropy
def joint_entropy(vars):
    marginal_entropy = 0.5*np.log(2*np.pi*np.e*vars)
    joint_entropy = np.sum(vars, axis=1)
    return joint_entropy
    
# Fetch user latent variables and return their entropy for PMF
def get_user_latent_entropy_PMF(model, user):
    import tensorflow as tf

    sess = model.sess
    graph_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    U_var  = graph_vars[1]
    Up_var = graph_vars[3]

    U_entropy = joint_entropy(sess.run(tf.nn.softplus(U_var)))
    Up_entropy = joint_entropy(sess.run(tf.nn.softplus(Up_var)))
    
    return U_entropy[user], Up_entropy[user]

In [70]:
from pmf import PMF
model = PMF(ratingMatrix=R)

In [71]:
N_STEPS_INITIAL_TRAIN = 3000
N_STEPS_FINETUNE = 500

In [72]:
CHECKPOING_FILENAME = 'test_checkpoing_pmf1500.ckpt'

%time loss = model.train(train_mask, n_iter=N_STEPS_INITIAL_TRAIN)
model.save(CHECKPOING_FILENAME)

CPU times: user 13.6 s, sys: 5 s, total: 18.6 s
Wall time: 9.23 s


'test_checkpoing_pmf1500.ckpt'

---

In [112]:
# def plot_R_binary(rhats, r=None, title='', user_test_mask=None):
#     _, n_items = rhats.shape
#     f, axes = plt.subplots(1, n_items, sharex=True, sharey=True)
#     i = 0
#     for j in range(n_items):
#         if r is not None:
#             axes[j].axvline(x=r[j], color='r', alpha=0.4)
#             if r[j] == 0:
#                 axes[j].set_facecolor('#aaaaaa')
#             else:
#                 if user_test_mask is not None and user_test_mask[j]:
#                     right_color = '#88cc88'
#                     wrong_color = '#cc88cc'
#                 else:
#                     right_color = '#ccefcc'
#                     wrong_color = '#efcccc'
                    
#                 mean = np.mean(rhats[:,j])
#                 if np.abs(mean - r[j]) > 0.499:
#                     axes[j].set_facecolor(wrong_color)
#                 else:
#                     axes[j].set_facecolor(right_color)
#                 axes[j].axvline(x=mean, color='b', alpha=0.4)
# #             if r[j] == 1:
# #                 axes[j].set_facecolor('#efcccc')
# #             if r[j] == 2:
# #                 axes[j].set_facecolor('#ccefcc')
#         axes[j].hist(rhats[:,j], histtype='stepfilled', normed=True, bins=100)
#     plt.xlim([0.8,2.2])
#     plt.ylim([0,4])
#     f.suptitle(title)
#     plt.show()
    

In [113]:
# uid = dense_users[0][1]
# print("User {}".format(uid))

# samples = model.sample_for_user(uid, 1000)

# ii = 0
# bch = 10
# while ii < 200: #R.shape[1]:
#     plot_R_binary(samples[:, ii:(ii+bch)], R[uid, ii:(ii+bch)], title="{} to {}".format(ii, ii+bch-1), 
#                   user_test_mask=test_masks[uid][ii:(ii+bch)])
#     ii += bch

---

In [30]:
from banditChoice import BanditChoice
bandit = BanditChoice()

def bandit_algo_random(samples, mask):
    _avail_idx = np.where(mask)[0]
    item = bandit.get_egreedy(samples[:,_avail_idx], _avail_idx, epsilon=1.0)
    return item

def bandit_algo_exploit(samples, mask):
    _avail_idx = np.where(mask)[0]
    item = bandit.get_egreedy(samples[:,_avail_idx], _avail_idx, epsilon=0.0)
    return item

def bandit_algo_egreedy(samples, mask):
    _avail_idx = np.where(mask)[0]
    item = bandit.get_egreedy(samples[:,_avail_idx], _avail_idx, epsilon=0.1)
    return item

def bandit_algo_ucb(samples, mask):
    _avail_idx = np.where(mask)[0]
    item = bandit.get_ucb(samples[:,_avail_idx], _avail_idx)
    return item

def test(users, bandit_algo, train=False, verbose=False):
    regret_per_user = {}
    for _, user in users:
        if verbose: print("---------------------- USER {} ----------------------".format(user))

        regret = []

        mask_ = np.copy(train_mask)
        test_mask_ = np.copy(test_masks[user])
        n_items = np.sum(test_mask_)

        cnt = 0

        for i in range(n_items):
            cnt += 1

            # Bandit
            samples = model.sample_for_user(user, 100)
            item = bandit_algo(samples, test_mask_)

            # Regret
            item_rating = R[user, item]
            best_item_rating = np.max(R[user, :] * test_mask_)
            regret.append(best_item_rating - item_rating)

            # Retrain
            mask_[user,item] = 1 # Showed this item; we can now train on it.
            test_mask_[item] = 0 # Can't show this item anymore.
            if train:
                model.load(CHECKPOING_FILENAME)
                model.train(mask_, n_iter=N_STEPS_FINETUNE)

        regret_per_user[user] = regret
    return regret_per_user

Evaluation helpers:

In [31]:
def plot_inst_regret(regret_per_user):
    for u_id, regret in regret_per_user.items():
        plt.plot(regret)
        plt.title("User {} regret over time".format(u_id))
    plt.show()
    
def plot_cum_regret(regret_per_user):
    for u_id, regret in regret_per_user.items():
        plt.plot(np.cumsum(regret))
        plt.title("User {} cumulative regret over time".format(u_id))
    plt.show()
        
def get_regret_stats(regret_per_user):
    if len(regret_per_user.items()) == 0:
        return float('nan'), float('nan')
    total_regret = sum([sum(regret) for _, regret in regret_per_user.items()])
    avg_regret = total_regret / len(regret_per_user.items())
    return total_regret, avg_regret

def print_regret_stats(regret_per_user):
    total_regret, avg_regret = get_regret_stats(regret_per_user)
    print("Total regret: {}".format(total_regret))
    print("Avg: {}".format(avg_regret))

In [32]:
%time dense20egreedy = test(dense_users[:20], bandit_algo_egreedy)
%time dense20random  = test(dense_users[:20], bandit_algo_random)
%time dense20exploit = test(dense_users[:20], bandit_algo_exploit)
%time spars20egreedy = test(spars_users[:20], bandit_algo_egreedy)
%time spars20random  = test(spars_users[:20], bandit_algo_random)
%time spars20exploit = test(spars_users[:20], bandit_algo_exploit)

CPU times: user 5min 35s, sys: 12.2 s, total: 5min 47s
Wall time: 1min 47s
CPU times: user 5min 35s, sys: 11.6 s, total: 5min 46s
Wall time: 1min 38s
CPU times: user 5min 35s, sys: 11.6 s, total: 5min 46s
Wall time: 1min 38s
CPU times: user 1min 31s, sys: 3.18 s, total: 1min 35s
Wall time: 26.8 s
CPU times: user 1min 31s, sys: 3.19 s, total: 1min 34s
Wall time: 26.9 s
CPU times: user 1min 32s, sys: 3.17 s, total: 1min 35s
Wall time: 26.8 s


In [33]:
print_regret_stats(dense20egreedy)
print_regret_stats(dense20random)
print_regret_stats(dense20exploit)
print_regret_stats(spars20egreedy)
print_regret_stats(spars20random)
print_regret_stats(spars20exploit)

Total regret: 114.0
Avg: 5.7
Total regret: 125.0
Avg: 6.25
Total regret: 116.0
Avg: 5.8
Total regret: 57.0
Avg: 2.85
Total regret: 63.0
Avg: 3.15
Total regret: 56.0
Avg: 2.8


The results are slightly random. It would be better to take more samples.

In [58]:
N_SAMPLES = 25

exploit = []
for _ in range(N_SAMPLES):
    regret_per_user = test(spars_users[:20], bandit_algo_exploit)
    total_regret, avg_regret = get_regret_stats(regret_per_user)
    exploit.append(avg_regret)
    
random = []
for _ in range(N_SAMPLES):
    regret_per_user = test(spars_users[:20], bandit_algo_random)
    total_regret, avg_regret = get_regret_stats(regret_per_user)
    random.append(avg_regret)
    
egreedy = []
for _ in range(N_SAMPLES):
    regret_per_user = test(spars_users[:20], bandit_algo_egreedy)
    total_regret, avg_regret = get_regret_stats(regret_per_user)
    egreedy.append(avg_regret)

KeyboardInterrupt: 

In [None]:
plt.hist(exploit, histtype='step', label='exploit')
plt.hist(random, histtype='step', label='random')
plt.hist(egreedy, histtype='step', label='egreedy')
plt.legend()
plt.show()

---