Skip to content

Commit

Permalink
Issue SheffieldML#93: user-defined sampling function
Browse files Browse the repository at this point in the history
  • Loading branch information
Joel Kaardal committed Jan 10, 2018
1 parent c7a0c06 commit 8abbba4
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 52 deletions.
7 changes: 5 additions & 2 deletions GPyOpt/methods/bayesian_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ def __init__(self, f, domain = None, constrains = None, cost_withGradients = Non
self.domain = domain
self.space = Design_space(self.domain, self.constrains)

# --- Get user-defined sample generator function, if applicable
self.user_def_dist = kwargs.get('user_def_dist', None)

# --- CHOOSE objective function
self.maximize = maximize
if 'objective_name' in kwargs: self.objective_name = kwargs['objective_name']
Expand Down Expand Up @@ -135,7 +138,7 @@ def __init__(self, f, domain = None, constrains = None, cost_withGradients = Non

# This states how the discrete variables are handled (exact search or rounding)
self.acquisition_optimizer_type = acquisition_optimizer_type
self.acquisition_optimizer = AcquisitionOptimizer(self.space, self.acquisition_optimizer_type, model=self.model) ## more arguments may come here
self.acquisition_optimizer = AcquisitionOptimizer(self.space, self.acquisition_optimizer_type, model=self.model, user_def_dist=self.user_def_dist) ## more arguments may come here

# --- CHOOSE acquisition function. If an instance of an acquisition is passed (possibly user defined), it is used.
self.acquisition_type = acquisition_type
Expand Down Expand Up @@ -188,7 +191,7 @@ def _init_design_chooser(self):

# Case 1:
if self.X is None:
self.X = initial_design(self.initial_design_type, self.space, self.initial_design_numdata)
self.X = initial_design(self.initial_design_type, self.space, self.initial_design_numdata, user_def_dist=self.user_def_dist)
self.Y, _ = self.objective.evaluate(self.X)
# Case 2
elif self.X is not None and self.Y is None:
Expand Down
7 changes: 5 additions & 2 deletions GPyOpt/optimization/acquisition_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ def __init__(self, space, optimizer='lbfgs', **kwargs):
## -- Context handler: takes
self.context_manager = ContextManager(space)

## -- user-defined sample generator
self.user_def_dist = kwargs.get('user_def_dist', None)


def optimize(self, f=None, df=None, f_df=None, duplicate_manager=None):
"""
Expand All @@ -62,9 +65,9 @@ def optimize(self, f=None, df=None, f_df=None, duplicate_manager=None):

## --- Selecting the anchor points and removing duplicates
if self.type_anchor_points_logic == max_objective_anchor_points_logic:
anchor_points_generator = ObjectiveAnchorPointsGenerator(self.space, random_design_type, f)
anchor_points_generator = ObjectiveAnchorPointsGenerator(self.space, random_design_type, f, user_def_dist=self.user_def_dist)
elif self.type_anchor_points_logic == thompson_sampling_anchor_points_logic:
anchor_points_generator = ThompsonSamplingAnchorPointsGenerator(self.space, sobol_design_type, self.model)
anchor_points_generator = ThompsonSamplingAnchorPointsGenerator(self.space, sobol_design_type, self.model, user_def_dist=self.user_def_dist)

## -- Select the anchor points (with context)
anchor_points = anchor_points_generator.get(duplicate_manager=duplicate_manager, context_manager=self.context_manager)
Expand Down
17 changes: 9 additions & 8 deletions GPyOpt/optimization/anchor_points_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@

class AnchorPointsGenerator(object):

def __init__(self, space, design_type, num_samples):
def __init__(self, space, design_type, num_samples, user_def_dist=None):
self.space = space
self.design_type = design_type
self.num_samples = num_samples
self.user_def_dist = user_def_dist # user-defined sample generator

def get_anchor_point_scores(self, X):
raise NotImplementedError("get_anchor_point_scores is not implemented in the parent class.")
Expand All @@ -28,7 +29,7 @@ def get(self, num_anchor=5, duplicate_manager=None, unique=False, context_manage
add_context = lambda x: x

## --- Generate initial design
X = initial_design(self.design_type, space, self.num_samples)
X = initial_design(self.design_type, space, self.num_samples, self.user_def_dist)

if unique:
sorted_design = sorted(list({tuple(x) for x in X}))
Expand Down Expand Up @@ -65,14 +66,14 @@ def get(self, num_anchor=5, duplicate_manager=None, unique=False, context_manage

class ThompsonSamplingAnchorPointsGenerator(AnchorPointsGenerator):

def __init__(self, space, design_type, model, num_samples=25000):
def __init__(self, space, design_type, model, num_samples=25000, user_def_dist=None):
'''
From and initial design, it selects the location using (marginal) Thompson sampling
using the predictive distribution of a model
model: NOTE THAT THE MODEL HERE IS is a GPyOpt model: returns mean and standard deviation
'''
super(ThompsonSamplingAnchorPointsGenerator, self).__init__(space, design_type, num_samples)
super(ThompsonSamplingAnchorPointsGenerator, self).__init__(space, design_type, num_samples, user_def_dist)
self.model = model

def get_anchor_point_scores(self, X):
Expand All @@ -84,13 +85,13 @@ def get_anchor_point_scores(self, X):

class ObjectiveAnchorPointsGenerator(AnchorPointsGenerator):

def __init__(self, space, design_type, objective, num_samples=1000):
def __init__(self, space, design_type, objective, num_samples=1000, user_def_dist=None):
'''
From an initial design, it selects the locations with the minimum value according to some objective.
:param model_space: set to true when the samples need to be obtained for the input domain of the model
'''
super(ObjectiveAnchorPointsGenerator, self).__init__(space, design_type, num_samples)
super(ObjectiveAnchorPointsGenerator, self).__init__(space, design_type, num_samples, user_def_dist)
self.objective = objective

def get_anchor_point_scores(self, X):
Expand All @@ -99,13 +100,13 @@ def get_anchor_point_scores(self, X):

class RandomAnchorPointsGenerator(AnchorPointsGenerator):

def __init__(self, space, design_type, num_samples=10000):
def __init__(self, space, design_type, num_samples=10000, user_def_dist=None):
'''
From an initial design, it selects the locations randomly, according to the specified design_type generation scheme.
:param model_space: set to true when the samples need to be obtained for the input domain of the model
'''
super(RandomAnchorPointsGenerator, self).__init__(space, design_type, num_samples)
super(RandomAnchorPointsGenerator, self).__init__(space, design_type, num_samples, user_def_dist)

def get_anchor_point_scores(self, X):

Expand Down
89 changes: 49 additions & 40 deletions GPyOpt/util/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,74 +6,83 @@
import time
from ..core.errors import InvalidConfigError

def initial_design(design, space, init_points_count):
def initial_design(design, space, init_points_count, user_def_dist=None):

if space.has_constrains() == False:
samples = sample_initial_design(design, space, init_points_count)
samples = sample_initial_design(design, space, init_points_count, user_def_dist)

elif space.has_constrains() == True:
if design is not 'random':
raise InvalidConfigError('Sampling with constrains is not allowed with Latin designs. Please use random design instead')

samples = np.empty((0,space.dimensionality))
while samples.shape[0] < init_points_count:
domain_samples = sample_initial_design(design, space, init_points_count)
domain_samples = sample_initial_design(design, space, init_points_count, user_def_dist)
valid_indices = (space.indicator_constraints(domain_samples)==1).flatten()
if sum(valid_indices)>0:
valid_samples = domain_samples[valid_indices,:]
samples = np.vstack((samples,valid_samples))
return samples[0:init_points_count,:]


def sample_initial_design(design, space, init_points_count):
def sample_initial_design(design, space, init_points_count, user_def_dist=None):
"""
:param design: the choice of design
:param space: variables space
:param init_points_count: the number of initial points
:param user_def_dist: user-defined sampling function (default, None)
:Note: discrete dimensions are always added based on uniform samples
"""
if design == 'grid':
print('Note: in grid designs the total number of generated points is the smallest closest integer of n^d to the selected amount of points')
continuous_dims = len(space.get_continuous_dims())
data_per_dimension = iroot(continuous_dims, init_points_count)
init_points_count = data_per_dimension**continuous_dims
samples = np.empty((init_points_count, space.dimensionality))

## -- fill randomly for the non continuous variables
for (idx, var) in enumerate(space.space_expanded):
if (var.type == 'discrete') or (var.type == 'categorical') :
sample_var = np.atleast_2d(np.random.choice(var.domain, init_points_count))
samples[:,idx] = sample_var.flatten()

## -- sample in the case of bandit variables
elif var.type == 'bandit':
idx_samples = np.random.randint(var.domain.shape[0],size=init_points_count)
samples = var.domain[idx_samples,:]

## -- fill the continuous variables with the selected design
if design == 'random':
X_design = samples_multidimensional_uniform(space.get_continuous_bounds(),init_points_count)
else:
bounds = space.get_continuous_bounds()
lB = np.asarray(bounds)[:,0].reshape(1,len(bounds))
uB = np.asarray(bounds)[:,1].reshape(1,len(bounds))
diff = uB-lB

if design == 'latin':
from pyDOE import lhs
X_design_aux = lhs(len(space.get_continuous_bounds()),init_points_count, criterion='center')
I = np.ones((X_design_aux.shape[0],1))
X_design = np.dot(I,lB) + X_design_aux*np.dot(I,diff)

elif design == 'sobol':
from sobol_seq import i4_sobol_generate
X_design = np.dot(i4_sobol_generate(len(space.get_continuous_bounds()),init_points_count),np.diag(diff.flatten()))[None,:] + lB

elif design == 'grid':
X_design = multigrid(space.get_continuous_bounds(), data_per_dimension)

if space._has_continuous():
samples[:,space.get_continuous_dims()] = X_design
data_per_dimension = None

if user_def_dist is None:
samples = np.empty((init_points_count, space.dimensionality))

## -- fill randomly for the non continuous variables
for (idx, var) in enumerate(space.space_expanded):
if (var.type == 'discrete') or (var.type == 'categorical'):
sample_var = np.atleast_2d(np.random.choice(var.domain, init_points_count))
samples[:,idx] = sample_var.flatten()

## -- sample in the case of bandit variables
elif var.type == 'bandit':
idx_samples = np.random.randint(var.domain.shape[0],size=init_points_count)
samples = var.domain[idx_samples,:]

## -- fill the continuous variables with the selected design
if design == 'random':
X_design = samples_multidimensional_uniform(space.get_continuous_bounds(),init_points_count)
else:
bounds = space.get_continuous_bounds()
lB = np.asarray(bounds)[:,0].reshape(1,len(bounds))
uB = np.asarray(bounds)[:,1].reshape(1,len(bounds))
diff = uB-lB

if design == 'latin':
from pyDOE import lhs
X_design_aux = lhs(len(space.get_continuous_bounds()),init_points_count, criterion='center')
I = np.ones((X_design_aux.shape[0],1))
X_design = np.dot(I,lB) + X_design_aux*np.dot(I,diff)

elif design == 'sobol':
from sobol_seq import i4_sobol_generate
X_design = np.dot(i4_sobol_generate(len(space.get_continuous_bounds()),init_points_count),np.diag(diff.flatten()))[None,:] + lB

elif design == 'grid':
X_design = multigrid(space.get_continuous_bounds(), data_per_dimension)

if space._has_continuous():
samples[:,space.get_continuous_dims()] = X_design

else:
## -- draw all samples from a user-defined distribution function
samples = np.atleast_2d(user_def_dist(space, init_points_count, data_per_dimension=data_per_dimension))

return samples

Expand Down

0 comments on commit 8abbba4

Please sign in to comment.