In [None]:
#!/usr/bin/env python
# coding: utf-8
# for simulation
import numpy as np
import random
import math
import time
import os
import argparse
from sklearn.linear_model import LogisticRegression

from algorithms.sgd_ts import SGD_TS
from algorithms.UCB import UCB
from algorithms.laplace_ts import LAPLACE_TS
from algorithms.gloc import GLOC
from tune import GridSearch
from algorithms.data_processor.data_generator import * 

import warnings
# silent the following warnings since that the step size in grid search set does not always offer convergence
warnings.filterwarnings(action='ignore', category=RuntimeWarning)
# ignore the following warning since that sklearn logistic regression does not always converge on the data
# it might be because that logistic model is not suitable for the data, this is probably the case especially for real datasets
from  warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

parser = argparse.ArgumentParser(description='simulations')
parser.add_argument('-t', '--t', type=int, help = 'total time')
parser.add_argument('-d', '--d', type=int, help = 'dimension')
parser.add_argument('-k', '--k', type=int, help = 'number of arms')
parser.add_argument('-rep', '--rep', type=int, default = 10, help = 'repeat times')
args = parser.parse_args()

T = args.t
d = args.d
K = args.k
rep = args.rep

ub = 1/math.sqrt(d)
lb = -1/math.sqrt(d)
model = 'logistic'
dist = 'ber'
if dist != 'ber' and model == 'logistic':
    raise NameError('logistic regression only supports bernoulli reward')

name = 'simulation_d' + str(d) + '_k' + str(K)
if not os.path.exists('results/'):
    os.mkdir('results/')
if not os.path.exists('results/' + name + '/'):
    os.mkdir('results/' + name + '/')

print('K: {}, T: {}, dimension: {}, data name: {}'.format(K, T, d, name)) 
reg_sgdts = np.zeros(T)
reg_ucbglm = np.zeros(T) 
reg_lts = np.zeros(T)
reg_gloc = np.zeros(T)
parameters = [
    [(2,0.1), (5,0.01,0.1,0.1), (0.01, 0.05, 0.5), (0.5)],
    [(1,1), (1,0.01,10,5), (1,1,0.01), (0.1)], 
    [(2,1), (1,0.5,10,0.1), (0.01, 0.5, 0.01), (0.01)],
    [(4,0.1), (2,0.5,5,1), (0.01,0.1,1), (0.1)],
    [(4,0.01), (1,0.01,1,0.01), (0.1,0.1,10), (0.1)],
    [(3,1), (4,0.05,0.1,0.01), (5,1,0.01), (0.05)],
    [(1,0.01), (5,0.1,0.01,0.01), (0.1,0.1,1), (0.1)],
    [(1,0.1), (2,0.1,10,0.1), (1,1,0.01), (0.5)],
    [(5,0.1), (1,0.5,0.1,5), (0.01,0.05,5), (0.1)],
    [(3,0.01), (7,1,0.1,0.1), (0.1,0.5,0.5), (0.5)]
]

times = {
    'ucb-glm': 0,
    'sgd-ts': 0,
    'gloc': 0,
    'lts': 0
}

for i in range(rep):
    print(i, ": ", end = " ")
    np.random.seed(i+1)
    theta = np.random.normal(0.1, 1, d)
    bandit = context(K, lb, ub, T, d, true_theta = theta)
    bandit.build_bandit(model)
    
    t0 = time.time()
    C, explore = parameters[i][0]
    tau = int(max(d, math.log(T)) * C)
    linucb = getattr(UCB(bandit, dist, T), model)
    reg_ucbglm += linucb(tau, 10**(-6), explore)
    times['ucb-glm'] += (time.time()-t0) 
    
    t0 = time.time()
    C, eta0, g1, g2 = parameters[i][1]
    tau = int(max(d, math.log(T)) * C)
    sgd_ts = SGD_TS(bandit, model, dist, T)
    reg_sgdts += sgd_ts.glm(eta0, tau, g1, g2)
    times['sgd-ts'] += (time.time()-t0) 
    
    t0 = time.time()
    c, k, eta = parameters[i][2]
    gloc = GLOC(bandit, model, dist, T)
    reg_gloc += gloc.Gloc(c, 1, k, eta, lamda = 1, eps = 1)
    times['gloc'] += (time.time()-t0) 
    
    t0 = time.time()
    eta0 = parameters[i][3]
    max_ite = 1000
    lts = LAPLACE_TS(bandit, model, dist, T)
    reg_lts += lts.laplace_ts(1, eta0, max_ite)
    times['lts'] += (time.time()-t0) 
    print(times)
    # print('cost {} minutes'.format( (time.time() - t0)/60 ))
    
for k in times:
    times[k] /= rep
print('average time: ', times)

result = {
    'ucb-glm': reg_ucbglm/rep,
    'sgd-ts': reg_sgdts/rep,
    'gloc': reg_gloc/rep,
    'lts': reg_lts/rep
}

In [None]:
#!/usr/bin/env python
# coding: utf-8
# for yahoo
import numpy as np
import pickle
import random
import math
import time
import os
import argparse
from sklearn.linear_model import LogisticRegression
import pickle
import os.path

from algorithms.sgd_ts import SGD_TS
from algorithms.UCB import UCB
from algorithms.laplace_ts import LAPLACE_TS
from algorithms.gloc import GLOC
from tune import GridSearch
from algorithms.data_processor.yahoo_extract_data import extract_data
from algorithms.data_processor.data_generator import *

import warnings
# silent the following warnings since that the step size in grid search set does not always offer convergence
warnings.filterwarnings(action='ignore', category=RuntimeWarning)
# ignore the following warning since that sklearn logistic regression does not always converge on the data
# it might be because that logistic model is not suitable for the data, this is probably the case especially for real datasets
from  warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

print('start processing yahoo data')
t0 = time.time()
if not os.path.isfile('data/rewards_yahoo.txt') or not os.path.isfile('data/features_yahoo.txt'):
    extract_data()
with open('data/rewards_yahoo.txt', 'rb') as f:
    rewards = pickle.load(f)
with open('data/features_yahoo.txt', 'rb') as f:
    features = pickle.load(f)
print('data processing done, cost time {} seconds'.format(time.time()-t0))

parser = argparse.ArgumentParser(description='experiments for yahoo data')
parser.add_argument('-rep', '--rep', type=int, default = 10, help = 'repeat times')                  
args = parser.parse_args()
rep = args.rep # number of times to repeat experiments

T = len(features)
K = 20
d = 6
model = 'logistic'
dist = 'ber'
dtype = 'yahoo'
if dist != 'ber' and model == 'logistic':
    raise NameError('logistic regression only supports bernoulli reward')
                                 
print('data: Yahoo, K: around {}, T: {}, dimension: {}'.format(K, T, d))                   
reg_sgdts = np.zeros(T)
reg_ucbglm = np.zeros(T) 
reg_lts = np.zeros(T)
reg_gloc = np.zeros(T)

parameters = [
    [(7,10), (7,1,0.01,0.01), (0.01, 0.05, 0.01), (0.05)]
]

times = {
    'ucb-glm': 0,
    'sgd-ts': 0,
    'gloc': 0,
    'lts': 0
}

for i in range(rep):
    print(i, ": ", end = " ")
    np.random.seed(i+1)
    bandit = yahoo(rewards, features, d)
    
    t0 = time.time()
    C, explore = parameters[i][0]
    tau = int(max(d, math.log(T)) * C)
    linucb = getattr(UCB(bandit, dist, T), model)
    reg_ucbglm += linucb(tau, 10**(-6), explore)
    times['ucb-glm'] += (time.time()-t0) 
    
    t0 = time.time()
    C, eta0, g1, g2 = parameters[i][1]
    tau = int(max(d, math.log(T)) * C)
    sgd_ts = SGD_TS(bandit, model, dist, T)
    reg_sgdts += sgd_ts.glm(eta0, tau, g1, g2)
    times['sgd-ts'] += (time.time()-t0) 
    
    t0 = time.time()
    c, k, eta = parameters[i][2]
    gloc = GLOC(bandit, model, dist, T)
    reg_gloc += gloc.Gloc(c, 1, k, eta, lamda = 1, eps = 1)
    times['gloc'] += (time.time()-t0) 
    
    t0 = time.time()
    eta0 = parameters[i][3]
    max_ite = 1000
    lts = LAPLACE_TS(bandit, model, dist, T)
    reg_lts += lts.laplace_ts(1, eta0, max_ite)
    times['lts'] += (time.time()-t0) 
    print(times)
    # print('cost {} minutes'.format( (time.time() - t0)/60 ))
    
for k in times:
    times[k] /= rep
print('average time: ', times)

result = {
    'ucb-glm': reg_ucbglm/rep,
    'sgd-ts': reg_sgdts/rep,
    'gloc': reg_gloc/rep,
    'lts': reg_lts/rep
}


In [None]:
#!/usr/bin/env python
# coding: utf-8

import numpy as np
import time
import gzip
import math
import pickle
import random
import os
import argparse
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn import preprocessing

from algorithms.sgd_ts import SGD_TS
from algorithms.UCB import UCB
from algorithms.laplace_ts import LAPLACE_TS
from algorithms.gloc import GLOC
from tune import GridSearch
from algorithms.data_processor.data_generator import * 

import warnings
# silent the following warnings since that the step size in grid search set does not always offer convergence
warnings.filterwarnings(action='ignore', category=RuntimeWarning)
# ignore the following warning since that sklearn logistic regression does not always converge on the data
# it might be because that logistic model is not suitable for the data, this is probably the case especially for real datasets
from  warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

parser = argparse.ArgumentParser(description='experiments for cover type data')
parser.add_argument('-rep', '--rep', type=int, default = 10, help = 'repeat times')
parser.add_argument('-t', '--t', type=int, help = 'total time')
parser.add_argument('-d', '--d', type=int, default = 10, help = 'number of features, choice of 10 (not use categorical features), 55 (use cat)')
parser.add_argument('-center', '--center', type=int, default = 1, help = 'use centriods as features')
parser.add_argument('-add', '--add', type=int, default = 0, help = 'add a constant column feature')

args = parser.parse_args()
rep = args.rep  # repeat times, set to 10
T = args.t  # total rounds, set to 1000
d = args.d  # feature dimension, if use only quantitative features, d = 10, otherwise, d = 55
center = args.center # if center == 1, use cluster centroid as features, if center == 0, use random features
add_constant = args.add # add_constant = 1 to add a constant feature to the data
d += add_constant

####
d = 56
center = 0
####

if center == 1:
    print('use cluster centroid as features, d = {},'.format(d), 'start processing data')
if center == 0:
    print('use random features, d = {},'.format(d), 'start processing data')
    
# extract, centeralize, standardize and cluster cover type data
lines = []
labels = []
t0 = time.time()
# save the 'covtype.data.gz' under the 'data' folder before running this code
with gzip.open('data/covtype.data.gz', "r") as f:
    for line in f:
        line = line.split(b',')
        tmp = line[:d]
        y = int(line[-1])
        if y!=1:
            y = 0
        x = [float(i) for i in tmp]
        lines += [x]
        labels += [y]

X = np.array(lines)
y = np.array(labels)
X[:,:10] = preprocessing.scale(X[:,:10])
if add_constant == 1:
    X_add = np.ones((X.shape[0],X.shape[1]+1))
    X_add[:,:-1] = X
else:
    X_add = X

np.random.seed(0)
kmeans = KMeans(n_clusters=32, random_state=0).fit(X_add)
rewards = [0]*32
idx = [None for _ in range(32)]
features = np.array(kmeans.cluster_centers_)
for nc in range(32):
    idx[nc] = np.where(kmeans.labels_ == nc)[0]
    num, den = sum(y[idx[nc]]), len(idx[nc])
    rewards[nc] = num / den
bandit_data = (X_add, y, idx)
K, d = 32, X_add.shape[1]

# the following code and function sort the reward and calculate the frequencies for the pulls of best 6 arms
rew = sorted(rewards, reverse = True)
gap = dict()
for i in range(32):
    gap[round(rew[0] - rew[i],4)] = i
def frequency(regr):
    fre = [0] * 32
    pulled = gap[round(regr[0],4)]
    fre[pulled] += 1
    for t in range(1, len(regr)):
        r = round(regr[t] - regr[t-1], 4)
        pulled = gap[r]
        fre[pulled] += 1
    return fre
print('data process done, cost in total {} seconds'.format(time.time()-t0))
print('max reward = {}, min reward = {}'.format(max(rewards), min(rewards)))
print('feature vectors shape: K={}, d={}'.format(K,d))

model = 'logistic'
dist = 'ber'
if dist != 'ber' and model == 'logistic':
    raise NameError('logistic regression only supports bernoulli reward')


print('K: {}, T: {}, dimension: {}, model: {}, dist: {}'.format(K, T, d, model, dist)) 
reg_sgdts = np.zeros(T)
reg_ucbglm = np.zeros(T) 
reg_lts = np.zeros(T)
reg_gloc = np.zeros(T)
fre_sgdts = []
fre_lts = []
fre_ucbglm = []
fre_gloc = []
parameters = [
    [(2,1), (1,0.05,1,0.01), (0.01, 0.05, 0.01), (0.01)]
]

times = {
    'ucb-glm': 0,
    'sgd-ts': 0,
    'gloc': 0,
    'lts': 0
}

for i in range(rep):
    print(i, ": ", end = " ")
    np.random.seed(i+1)
    if center:
        bandit = covtype(rewards, features, T, d)
    else:
        bandit = covtype_random_feature(rewards, bandit_data, T, d)
        bandit.build_bandit()  
        
    t0 = time.time()
    C, explore = parameters[i][0]
    tau = int(max(d, math.log(T)) * C)
    linucb = getattr(UCB(bandit, dist, T), model)
    reg_ucbglm += linucb(tau, 10**(-6), explore)
    times['ucb-glm'] += (time.time()-t0) 
    
    t0 = time.time()
    C, eta0, g1, g2 = parameters[i][1]
    tau = int(max(d, math.log(T)) * C)
    sgd_ts = SGD_TS(bandit, model, dist, T)
    reg_sgdts += sgd_ts.glm(eta0, tau, g1, g2)
    times['sgd-ts'] += (time.time()-t0) 
    
    t0 = time.time()
    c, k, eta = parameters[i][2]
    gloc = GLOC(bandit, model, dist, T)
    reg_gloc += gloc.Gloc(c, 1, k, eta, lamda = 1, eps = 1)
    times['gloc'] += (time.time()-t0) 
    
    t0 = time.time()
    eta0 = parameters[i][3]
    max_ite = 1000
    lts = LAPLACE_TS(bandit, model, dist, T)
    reg_lts += lts.laplace_ts(1, eta0, max_ite)
    times['lts'] += (time.time()-t0) 
    print(times)

for k in times:
    times[k] /= rep
print('average time: ', times)

result = {
    'ucb-glm': reg_ucbglm/rep,
    'sgd-ts': reg_sgdts/rep,
    'gloc': reg_gloc/rep,
    'lts': reg_lts/rep
}

frequent = {
    'ucb-glm': fre_ucbglm,
    'sgd-ts': fre_sgdts,
    'gloc': fre_gloc,
    'lts': fre_lts
}

