# Run Particle Thompson Sampling with Toy Data

Let's run the model with synthetic data and compare the cumulative regret of the model with the cumulative regret of random selection strategy.

In [None]:
import numpy as np
import logging
import itertools
from scipy.sparse import csr_matrix
import matplotlib
import matplotlib.pyplot as plt
import itertools
import time
import os
import pickle
import multiprocessing as mp
from collections import defaultdict
from brescal import gen_random_tensor
from seq_brescal import PFBayesianRescal, compute_regret
from sklearn.metrics import roc_auc_score, mean_squared_error

%matplotlib inline
# logger = logging.getLogger()
# logger.setLevel(logging.INFO)

## Generate Toy Dataset & RUN

In [None]:
# model parameters
n_test = 10
var_xs = [0.1, 0.01]
mc_moves = [1, 5]
n_particles = [5, 10]
rbps = [True, False]

# toy data generation parameters
n_dims = [5, 10]
n_entities = [5, 10]
n_relations = [5, 10]

for n_dim, n_entity, n_relation in itertools.product(n_dims, n_entities, n_relations):
    # additional parameters
    max_iter = n_relation * n_entity**2
    random_regret = np.zeros(max_iter)
    result = list()

    # dest
    dest = '../result/toy/'

    if not os.path.exists(dest):
        os.makedirs(dest)

    pool = mp.Pool(8)
    tic=time.time()

    T = np.zeros([n_test, n_relation, n_entity, n_entity])
    for nt in range(n_test):
        # generate toy data
        var_e = 1.
        var_r = 1.
        var_x = 0.01
        test_file = os.path.join(dest, 'toy_data__%d_%d_%d_%d_%.2f_%.2f_%.2f' % (nt, n_dim, n_entity, n_relation, var_e, var_r, var_x))
        if os.path.exists(test_file):
            with open(test_file, 'rb') as f:
                _T = pickle.load(f)
                T[nt] = _T
        else:
            _T = gen_random_tensor(n_dim, n_entity, n_relation, var_e=var_e, var_r=var_r, var_x=var_x)
            T[nt] = _T
            with open(test_file, 'wb') as f:
                pickle.dump(_T, f)

        _var_x = var_x
        # particle thompson sampling
        for n_particle, rbp, var_x, mc_move in itertools.product(n_particles, rbps, var_xs, mc_moves):
            log_file = os.path.join(dest, 'pThompson_%d_%d_%d_%d_%.2f_%.2f_%.2f_%d_%r_%.2f_%d.txt' % (nt, n_dim, n_entity, n_relation, var_e, var_r, _var_x, n_particle, rbp, var_x, mc_move))
            if os.path.exists(log_file):
                continue

            def finalize(nt, n_particle, rbp, var_x, mc_move):
                # function argument closure
                def inner(rval):
                    return result.append((rval, nt, n_particle, rbp, var_x, mc_move))
                return inner

            _callback = finalize(nt, n_particle, rbp, var_x, mc_move)

            print(log_file)
            maskT = np.zeros_like(T[nt])
            model = PFBayesianRescal(n_dim, var_x = var_x, controlled_var=False, n_particles=n_particle,
                                     compute_score=False, parallel=False, sample_prior=False, 
                                     gibbs_init=False, rbp=rbp, mc_move=mc_move, log=log_file)
            pool.apply_async(model.fit, args=(T[nt],), kwds={'obs_mask':maskT, 'max_iter':max_iter}, callback=_callback)

    pool.close()
    pool.join()
    print('elapsed time', time.time()-tic)

## Plot Cumulative Regret

### Compare Performance with Random Selection

In [None]:
# compute cumulative regret for each configuration first
summary = dict()
#for seq, nt, n_particle, rbp, var_x, mc_move in result:
for nt, n_particle, rbp, var_x, mc_move in itertools.product(range(n_test), n_particles, rbps, var_xs, mc_moves):
    file = os.path.join(dest, 'pThompson_%d_%d_%d_%d_%.2f_%.2f_%.2f_%d_%r_%.2f_%d.txt' % (nt, n_dim, n_entity, n_relation, var_e, var_r, _var_x, n_particle, rbp, var_x, mc_move))
    seq = [line.split(',') for line in open(file, 'r').readlines()]    
    if not summary.__contains__((n_particle, rbp, var_x, mc_move)):
        summary[(n_particle, rbp, var_x, mc_move)] = np.zeros(max_iter)
        
    regret = compute_regret(T[nt], seq)
    summary[(n_particle, rbp, var_x, mc_move)] += np.cumsum(regret)


# compute cumulative regret of random selection
for nt in range(n_test):
    mask = np.ones_like(T[nt])
    _seq = [s for s in itertools.product(range(n_relation), range(n_entity), range(n_entity))]
    np.random.shuffle(_seq)
    regret = compute_regret(T[nt], _seq)
    random_regret += np.cumsum(regret)

# plot cumulative regrets
fig = plt.figure(figsize=(12,8))
plt.plot(range(max_iter), random_regret/n_test, label='RANDOM')

for key in summary.keys():
    n_particle, rbp, var_x, mc_move = key
    plt.plot(range(max_iter), summary[key]/n_test, label='pThompson-%d_%r_%.2f_%d' % (n_particle, rbp, var_x, mc_move))

plt.legend(loc=0)
plt.title('Cumulative Regret')
plt.show()

## Which configuration performs best?

In [None]:
models = list()
for key in summary.keys():
    n_particle, rbp, var_x, mc_move = key
    final_regret = summary[key][-1]
    models.append((final_regret,'pThompson-%d_%r_%.2f_%d' % (n_particle, rbp, var_x, mc_move)))
models.sort()
for model in models:
    print(model)    

## Cumulative Regret of First 200 Iterations

In [None]:
# plot cumulative regrets
fig = plt.figure(figsize=(18,8))
print_iter = 200
for key in summary.keys():
    n_particle, rbp, var_x, mc_move = key
    if var_x == 0.1:
        plt.subplot(1,2,1)
        plt.plot(range(print_iter), summary[key][:print_iter]/n_test, label='pThompson-%d_%r_%.2f_%d' % (n_particle, rbp, var_x, mc_move))
    else:
        plt.subplot(1,2,2)
        plt.plot(range(print_iter), summary[key][:print_iter]/n_test, label='pThompson-%d_%r_%.2f_%d' % (n_particle, rbp, var_x, mc_move))

plt.subplot(1,2,1)
plt.legend(loc=0)
plt.title('Cumulative Regret, var_x = 0.1')
plt.subplot(1,2,2)
plt.legend(loc=0)
plt.title('Cumulative Regret, var_x = 0.01')
plt.show()

## Cumulative Regret of After 200 Iterations

In [None]:
# plot cumulative regrets
fig = plt.figure(figsize=(18,8))
print_iter = 200
for key in summary.keys():
    n_particle, rbp, var_x, mc_move = key
    if var_x == 0.1:
        plt.subplot(1,2,1)
        plt.plot(range(max_iter-print_iter), summary[key][print_iter:]/n_test, label='pThompson-%d_%r_%.2f_%d' % (n_particle, rbp, var_x, mc_move))
    else:
        plt.subplot(1,2,2)
        plt.plot(range(max_iter-print_iter), summary[key][print_iter:]/n_test, label='pThompson-%d_%r_%.2f_%d' % (n_particle, rbp, var_x, mc_move))

plt.subplot(1,2,1)
plt.legend(loc=0)
plt.title('Cumulative Regret, var_x = 0.1')
plt.subplot(1,2,2)
plt.legend(loc=0)
plt.title('Cumulative Regret, var_x = 0.01')
plt.show()