In [1]:
%matplotlib inline
import os
os.chdir("..")
import matplotlib.pyplot as plt
import sys
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import TruncatedSVD
import os
from math import log
import random
import time
import pickle
from numpy.linalg import pinv
from arm_class import ArmGaussian
from LinTS_class import PolicyLinTS
from LinUCB_class import PolicyLinUCB
from D_LinUCB_class import DLinUCB
from D_LinTS_class import DLinTS
from D_RandLinUCB_class import DRandLinUCB
from environment_class import Environment
from simulator_class import Simulator
from utils import plot_regret

In [None]:
#Input Parameters
d = 10  # dimension
k = 5  # number of arms / 2

In [None]:
#Load Criteo Dataset
seed = 1
DATA_FILE='criteo_attribution_dataset.tsv.gz'
df = pd.read_csv(DATA_FILE, sep='\t', compression='gzip')
df_sub = df.sample(n=200000, random_state=seed)
FEATURES = ['campaign', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat8', 'cat9']
df_onehot = df_sub.click
for index in FEATURES:
    df_add = pd.get_dummies(df_sub[index])
    df_onehot = pd.concat([df_onehot, df_add], axis=1)
df_onehot.drop(['click'], axis=1)
svd = TruncatedSVD(d)
reduced = svd.fit_transform(df_onehot)
print(reduced.shape)

In [None]:
#Manipulating Dataset for Experiment
df_reduced = pd.DataFrame(data = reduced)
X = df_reduced
y = 2*df_sub.click-1
reg = LinearRegression().fit(X, y)
print(reg.intercept_)
print(reg.coef_)
coef = reg.coef_
s = np.linalg.norm(coef)
print(s)
df_not_click = df_reduced[(df_sub.click == 0).tolist()].sample(n=10000, random_state=seed)
df_click = df_reduced[(df_sub.click == 1).tolist()].sample(n=10000, random_state=seed)
m1 = np.max(np.sqrt(np.square(df_not_click).sum(axis=1)))
m2 = np.max(np.sqrt(np.square(df_click).sum(axis=1)))
l = np.max([m1,m2])
print(l)

In [None]:
# General parameters
delta = 0.01 # Probability of being outside the confidence interval
lambda_ = 1 # Regularisation parameter
q = 10 # Diplaying the quantile (in %)
steps = 10000  # number of steps for the experiment
n_scat = 1000 # How frequently should we plot the true parameter
t_saved = None
alpha = 1
sigma_noise = np.sqrt(0.15) # Square root of the variance of the noise
verbose = False

In [None]:
# Simulator Functions
def experiment_policies(n_mc):
    mab = Environment(d, theta, sigma_noise, verbose, None, df_click, df_not_click)
    simulator = Simulator(mab, theta, policies, k, d, steps, bp, verbose)
    avgRegret, qRegret, QRegret = simulator.run(steps, n_mc, q, n_scat, t_saved)
    return avgRegret, qRegret, QRegret

def data_from_experiment(n_mc):
    avgRegret, qRegret, QRegret = experiment_policies(n_mc)
    data = [[policy, avgRegret[policy], qRegret[policy],
            QRegret[policy]] for policy in avgRegret]
    return data

In [None]:
### Description of the experiment
np.random.seed(seed)
theta = coef # Starting point of the true unknown parameter
print(theta.astype)
flip = 2*np.random.binomial(1,0.4,d)-1
print(flip.astype)
theta_flip = np.multiply(coef, theta)

bp = {4001: theta_flip} # Descrition of the breakpoints
B_T = np.linalg.norm(theta - theta_flip) # Computed manually
print('B_T value:', B_T)
print('Sigma value for the experimenxt:', sigma_noise)

gamma  = 1 - (B_T/(d*steps))**(2/3) # Optimal Value to minimize the asymptotical regret
tau = (d*steps/B_T)**(2/3) # Optimal Value to minimize the asymptotical regret
print('gamma:', gamma)
print('tau:', tau)

policies = [
            DLinUCB(d, delta, alpha, lambda_, s, l, gamma, '', sm = False, sigma_noise = sigma_noise, verbose=verbose),
            DLinTS(d, delta, alpha, lambda_, s, l, gamma, '', sm = False, sigma_noise = sigma_noise, verbose=verbose),
            DRandLinUCB(d, delta, alpha/2.5, lambda_, s, l, gamma, '', sm = False, sigma_noise = sigma_noise, verbose=verbose, option = "couple_opt"),
            #PolicyLinUCB(d, delta, alpha, lambda_, s, l, '', sm = True, sigma_noise = sigma_noise, verbose=verbose),
            #PolicyLinUCB(d, delta, alpha, lambda_, s, l, '-OR', sm = True, sigma_noise = sigma_noise, verbose=verbose, omniscient = True),
            PolicyLinTS(d, delta, alpha, lambda_, s, l, '', sm = True, sigma_noise = sigma_noise, verbose=verbose),
            PolicyLinTS(d, delta, alpha, lambda_, s, l, '-OR', sm = True, sigma_noise = sigma_noise, verbose=verbose, omniscient = True)
           ]

# Unparallelized experiments

In [None]:
data= data_from_experiment(n_mc=100)

In [None]:
import pickle
with open('10dim_10arms', 'wb') as f:
    pickle.dump(data, f)

In [None]:
plot_regret(data, t_saved, filename = None, log=False, qtl=False, loc=2, font=13, bp = bp, bp_2 = {})