/
StoSOO.py
260 lines (215 loc) · 7.28 KB
/
StoSOO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# -*- coding: utf-8 -*-
"""Implementation of StoSOO (Valko et al., 2013)
"""
# Author: Wenjie Li <li3549@purdue.edu>
# License: MIT
import math
import numpy as np
from PyXAB.algos.Algo import Algorithm
from PyXAB.partition.Node import P_node
from PyXAB.partition.BinaryPartition import BinaryPartition
import pdb
class StoSOO_node(P_node):
"""
Implementation of the node in the StoSOO algorithm
"""
def __init__(self, depth, index, parent, domain):
"""
Initialization of the StoSOO node
Parameters
----------
depth: int
depth of the node
index: int
index of the node
parent:
parent node of the current node
domain: list(list)
domain that this node represents
"""
super(StoSOO_node, self).__init__(depth, index, parent, domain)
self.b_value = np.inf
self.visited_times = 0
self.rewards = []
self.mean_reward = 0
def update_reward(self, reward):
"""
The function to update the reward list of the node
Parameters
----------
reward: float
the reward for evaluating the node
Returns
-------
"""
self.visited_times += 1
self.rewards.append(reward)
self.mean_reward = np.sum(np.array(self.rewards)) / self.visited_times
def compute_b_value(self, n, k, delta):
"""
The function to compute the b_{h,i} value of the node
Parameters
----------
n: int
The total number of rounds (budget)
k: int
The maximum number of pulls per node
delta: float
The confidence parameter
Returns
-------
"""
if self.visited_times == 0:
self.b_value = np.inf
else:
self.mean_reward = np.sum(np.array(self.rewards)) / self.visited_times
self.b_value = self.mean_reward + np.sqrt(
np.log(n * k / delta) / (2 * self.visited_times)
)
def get_visited_times(self):
"""
The function to get the number of visited times of the node
Returns
-------
"""
return self.visited_times
def get_b_value(self):
"""
The function to get the b_{h,i} value of the node
Returns
-------
"""
return self.b_value
def get_mean_reward(self):
"""
The function to get the mean reward of the node
Returns
-------
"""
return self.mean_reward
class StoSOO(Algorithm):
"""
The implementation of the StoSOO algorithm (Valko et al., 2013)
"""
def __init__(
self,
n=100,
k=None,
h_max=100,
delta=None,
domain=None,
partition=BinaryPartition,
):
"""
The initialization of the StoSOO algorithm
Parameters
----------
n: int
The total number of rounds (budget)
k: int
The maximum number of pulls per node
h_max: int
The maximum depth limit
delta: float
The confidence parameter delta
domain: list(list)
The domain of the objective to be optimized
partition:
The partition choice of the algorithm
"""
super(StoSOO, self).__init__()
if domain is None:
raise ValueError("Parameter space is not given.")
if partition is None:
raise ValueError("Partition of the parameter space is not given.")
self.partition = partition(domain=domain, node=StoSOO_node)
self.iteration = 0
self.n = n # budget
if k is None: # max number of pulls per node
self.k = np.ceil(self.n / (np.log(n) ** 3))
else:
self.k = k
if delta is None:
self.delta = 1 / np.sqrt(self.n)
else:
self.delta = delta
self.h_max = h_max # max depth
def pull(self, time):
"""
The pull function of StoSOO that returns a point in every round
Parameters
----------
time: int
time stamp parameter
Returns
-------
point: list
the point to be evaluated
"""
self.iteration = time
self.b_max = -np.inf
node_list = self.partition.get_node_list()
h = 0
while h <= (min(self.partition.get_depth() + 1, self.h_max)):
if self.iteration <= self.n:
max_b_node_ind = None
for j in range(len(node_list[h])): # for every node
node = node_list[h][j]
if node.get_children() is None: # if it is a leaf
node.compute_b_value(n=self.n, k=self.k, delta=self.delta)
# Locate the leaf node with the max b
if max_b_node_ind is None:
max_b_node_ind = j
else:
if (
node_list[h][max_b_node_ind].get_b_value()
<= node_list[h][j].get_b_value()
):
max_b_node_ind = j
if max_b_node_ind is not None: # If there exists a leaf node in layer h
if node_list[h][max_b_node_ind].get_b_value() >= self.b_max:
if node_list[h][max_b_node_ind].get_visited_times() < self.k:
self.max_b_node_ind = max_b_node_ind
self.max_b_node_h = h
return node_list[h][
max_b_node_ind
].get_cpoint() # Return a point
else:
# If there is a new layer, just add the children, otherwise create a new layer
self.partition.make_children(
node_list[h][max_b_node_ind],
newlayer=(h >= self.partition.get_depth()),
)
self.b_max = node_list[h][max_b_node_ind].get_b_value()
h += 1 # increase the search depth
def receive_reward(self, time, reward):
"""
The receive_reward function of StoSOO to obtain the reward and update the Statistics
Parameters
----------
time: int
The time stamp parameter
reward: float
the reward of the evaluation
Returns
-------
"""
node_list = self.partition.get_node_list()
node_list[self.max_b_node_h][self.max_b_node_ind].update_reward(reward)
def get_last_point(self):
"""
The function to get the last point in StoSOO
Returns
-------
point: list
The output of the StoSOO algorithm at last
"""
max_depth = self.partition.get_depth()
max_mu = -np.inf
max_x = None
for node in self.partition.get_node_list()[max_depth]:
mean_reward = node.get_mean_reward()
if mean_reward >= max_mu:
max_mu = mean_reward
max_x = node.get_cpoint()
return max_x