In [1]:
import numpy as np
import cPickle as pickle
import scipy
import combo
import os
import urllib
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def download():
    if not os.path.exists('data/s5-210.csv'):

        if not os.path.exists('data'):
            os.mkdir('data')
            
        print('Downloading...')
        urllib.urlretrieve('http://www.tsudalab.org/files/s5-210.csv', 'data/s5-210.csv')
        print('Done')

In [3]:
def load_data():
    download()
    A =  np.asarray( np.loadtxt('data/s5-210.csv',skiprows=1,delimiter=',') )
    X = A[:,0:3]
    t  = -A[:,3]
    return X, t

In [4]:
# Load the data. 
# X is the N x d dimensional matrix. Each row of X denotes the d-dimensional feature vector of search candidate. 
# t is the N-dimensional vector that represents the corresponding negative energy of search candidates. 
# ( It is of course unknown in practice. )
X, t = load_data()
 
# Normalize the mean and standard deviation along the each column of X to 0 and 1, respectively
X = combo.misc.centering( X )

In [5]:
# Declare the class for calling the simulator. 
# In this tutorial, we simply refer to the value of t. 
# If you want to apply combo to other problems, you have to customize this class. 
class simulator:
    def __init__( self ):
        _, self.t = load_data()
    
    def __call__( self, action ):
        return self.t[action]

In [6]:
# Design of policy

# Declaring the policy by 
policy = combo.search.discrete.policy(test_X=X)
# test_X is the set of candidates which is represented by numpy.array.
# Each row vector represents the feature vector of the corresponding candidate

# set the seed parameter 
policy.set_seed( 0 )

In [7]:
# If you want to perform the initial random search before starting the Bayesian optimization, 
# the random sampling is performed by 

res = policy.random_search(max_num_probes=20, simulator=simulator())
# Input: 
# max_num_probes: number of random search 
# simulator = simulator
# output: combo.search.discreate.results (class)


# single query Bayesian search
# The single query version of COMBO is performed by 
res = policy.bayes_search(max_num_probes=80, simulator=simulator(), score='TS', 
                                                  interval=20, num_rand_basis=5000)

# Input
# max_num_probes: number of searching by Bayesian optimization
# simulator: the class of simulator which is defined above
# score: the type of aquision funciton. TS, EI and PI are available
# interval: the timing for learning the hyper parameter. 
#               In this case, the hyper parameter is learned at each 20 steps
#               If you set the negative value to interval, the hyper parameter learning is not performed 
#               If you set zero to interval, the hyper parameter learning is performed only at the first step
# num_rand_basis: the number of basis function. If you choose 0,  ordinary Gaussian process runs

0001-th step: f(x) = -1.070602 (action=15673)
   current best f(x) = -1.070602 (best action = 15673) 

0002-th step: f(x) = -1.153410 (action=16489)
   current best f(x) = -1.070602 (best action = 15673) 

0003-th step: f(x) = -0.981899 (action=7792)
   current best f(x) = -0.981899 (best action = 7792) 

0004-th step: f(x) = -1.066080 (action=13752)
   current best f(x) = -0.981899 (best action = 7792) 

0005-th step: f(x) = -1.043272 (action=9023)
   current best f(x) = -0.981899 (best action = 7792) 

0006-th step: f(x) = -1.125822 (action=1470)
   current best f(x) = -0.981899 (best action = 7792) 

0007-th step: f(x) = -1.070720 (action=14404)
   current best f(x) = -0.981899 (best action = 7792) 

0008-th step: f(x) = -1.091624 (action=14031)
   current best f(x) = -0.981899 (best action = 7792) 

0009-th step: f(x) = -0.963795 (action=5734)
   current best f(x) = -0.963795 (best action = 5734) 

0010-th step: f(x) = -0.989538 (action=3111)
   current best f(x) = -0.963795 (best 

In [13]:
# The result of searching is summarized in the class combo.search.discrete.results.history()
# res.fx: observed negative energy at each step
# res.chosed_actions: history of choosed actions
# fbest, best_action= res.export_all_sequence_best_fx(): current best fx and current best action 
#                                                                                                   that has been observed until each step
# res.total_num_search: total number of search
print 'f(x)='
print res.fx[0:res.total_num_search]
best_fx, best_action = res.export_all_sequence_best_fx()
print 'current best'
print best_fx
print 'current best action='
print best_action
print 'history of chosed actions='
print res.chosed_actions[0:res.total_num_search]

f(x)=
[-1.07060214 -1.15340978 -0.98189856 -1.0660804  -1.04327156 -1.12582192
 -1.07071983 -1.09162401 -0.96379539 -0.98953771 -1.13500669 -1.00395435
 -0.99460129 -0.97110762 -1.09609145 -0.98278381 -1.05258966 -1.07973688
 -1.02511608 -1.04873284 -1.07139841 -1.0142353  -0.96441796 -1.12176327
 -1.01191189 -1.09929812 -2.83283038 -0.99754301 -1.03973342 -1.0378822
 -0.97361382 -1.04503835 -1.02471098 -0.97717442 -1.01682878 -1.08035291
 -1.10409646 -1.02463792 -1.03130881 -1.02970235 -1.04319822 -1.07006969
 -1.20155612 -1.03499165 -1.4238407  -1.08333102 -0.99770846 -1.05757818
 -1.00452376 -1.00565377 -0.96503172 -1.31475034 -0.96397534 -0.99741849
 -1.0623879  -1.37161666 -1.05275454 -1.0879136  -1.01891965 -1.08263454
 -1.1425489  -1.08111165 -0.99065748 -0.99981525 -1.06754785 -1.0130527
 -0.9670393  -1.04436888 -1.13343804 -0.9852775  -1.01114434 -0.97978012
 -1.00569357 -1.07454822 -0.99178493 -0.99478408 -1.00059502 -1.07134639
 -1.03792654 -1.01243496 -1.12320939 -1.1375052

In [14]:
# save the results
res.save('test.npz')

In [15]:
del res

In [18]:
# load the results
res = combo.search.discrete.results.history()
res.load('test.npz')