Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix random state for test_noisy_artificial_function_loss #977

Merged
merged 11 commits into from
Jan 26, 2021
12 changes: 8 additions & 4 deletions nevergrad/benchmark/test_xpbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_run_artificial_function() -> None:
xp = xpbase.Experiment(func, optimizer="OnePlusOne", budget=24, num_workers=2, batch_mode=True, seed=12)
summary = xp.run()
assert summary["elapsed_time"] < 0.5 # should be much faster
np.testing.assert_almost_equal(summary["loss"], 0.00078544) # makes sure seeding works!
np.testing.assert_almost_equal(summary["loss"], 0.08444784112287358) # makes sure seeding works!
testing.assert_set_equal(summary.keys(), DESCRIPTION_KEYS)
np.testing.assert_equal(summary["elapsed_budget"], 24)
np.testing.assert_equal(summary["pseudotime"], 12) # defaults to 1 unit per eval ( /2 because 2 workers)
Expand All @@ -48,21 +48,25 @@ def test_run_packed_artificial_function() -> None:
)
xp = xpbase.Experiment(func, optimizer="OnePlusOne", budget=24, num_workers=2, batch_mode=True, seed=14)
summary = xp.run()
np.testing.assert_almost_equal(summary["loss"], -9676.5, decimal=1) # makes sure seeding works!
np.testing.assert_almost_equal(
summary["loss"], -9784.829729792353, decimal=1
) # makes sure seeding works!


def test_noisy_artificial_function_loss() -> None:
func = ArtificialFunction(name="sphere", block_dimension=5, noise_level=0.3)
seed = np.random.randint(99999)
xp = xpbase.Experiment(func, optimizer="OnePlusOne", budget=5, seed=seed)
# Because copy() can have different random initialization for the parameters
# The function should be copied early.
np.random.seed(seed)
pfunc = func.copy()
jrapin marked this conversation as resolved.
Show resolved Hide resolved
xp.run()
loss_ref = xp.result["loss"]
# now with copy
assert xp._optimizer is not None
reco = xp._optimizer.provide_recommendation()
assert reco is not None
np.random.seed(seed)
pfunc = func.copy()
np.testing.assert_equal(pfunc.evaluation_function(reco), loss_ref)
np.random.seed(None)

Expand Down
44 changes: 26 additions & 18 deletions nevergrad/functions/functionlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(
rotation: bool,
hashing: bool,
only_index_transform: bool,
random_state: np.random.RandomState,
) -> None:
self._dimension = dimension
self._transforms: tp.List[utils.Transform] = []
Expand All @@ -40,21 +41,25 @@ def __init__(
self.only_index_transform = only_index_transform
self.hashing = hashing
self.dimension = self._dimension
self.random_state = random_state

def _initialize(self) -> None:
"""Delayed initialization of the transforms to avoid slowing down the instance creation
(makes unit testing much faster).
This functions creates the random transform used upon each block (translation + optional rotation).
"""
# use random indices for blocks
indices = np.random.choice(
indices = self.random_state.choice(
self._dimension, self.block_dimension * self.num_blocks, replace=False
).tolist()
indices.sort() # keep the indices sorted sorted so that blocks do not overlap
for transform_inds in tools.grouper(indices, n=self.block_dimension):
self._transforms.append(
utils.Transform(
transform_inds, translation_factor=self.translation_factor, rotation=self.rotation
transform_inds,
translation_factor=self.translation_factor,
rotation=self.rotation,
random_state=self.random_state,
)
)

Expand All @@ -65,11 +70,9 @@ def process( # pylint: disable=unused-argument
self._initialize()
if self.hashing:
data2 = np.array(data, copy=True)
state = np.random.get_state()
for i, y in enumerate(data):
np.random.seed(int(hashlib.md5(str(y).encode()).hexdigest(), 16) % 500000) # type: ignore
data2[i] = np.random.normal(0.0, 1.0) # type: ignore
np.random.set_state(state)
self.random_state.seed(int(hashlib.md5(str(y).encode()).hexdigest(), 16) % 500000) # type: ignore
data2[i] = self.random_state.normal(0.0, 1.0) # type: ignore
data = data2
data = np.array(data, copy=False)
output = []
Expand Down Expand Up @@ -170,16 +173,7 @@ def __init__( # pylint: disable=too-many-arguments
# special case
info = corefuncs.registry.get_info(self._parameters["name"])
only_index_transform = info.get("no_transform", False)
# variable
self.transform_var = ArtificialVariable(
dimension=self._dimension,
num_blocks=num_blocks,
block_dimension=block_dimension,
translation_factor=translation_factor,
rotation=rotation,
hashing=hashing,
only_index_transform=only_index_transform,
)

assert not (split and hashing)
assert not (split and useless_variables > 0)
parametrization = (
Expand All @@ -194,6 +188,18 @@ def __init__( # pylint: disable=too-many-arguments
if noise_level > 0:
parametrization.descriptors.deterministic_function = False
super().__init__(self.noisy_function, parametrization)
# variable, must come after super().__init__(...) to bind the random_state
# may consider having its a local random_state instead but less reproducible
self.transform_var = ArtificialVariable(
dimension=self._dimension,
num_blocks=num_blocks,
block_dimension=block_dimension,
translation_factor=translation_factor,
rotation=rotation,
hashing=hashing,
only_index_transform=only_index_transform,
random_state=self._parametrization.random_state,
)
self._aggregator = {"max": np.max, "mean": np.mean, "sum": np.sum}[aggregator]
info = corefuncs.registry.get_info(self._parameters["name"])
# add descriptors
Expand Down Expand Up @@ -242,6 +248,7 @@ def noisy_function(self, x: tp.ArrayLike) -> float:
func=self.function_from_transform,
noise_level=self._parameters["noise_level"],
noise_dissymmetry=self._parameters["noise_dissymmetry"],
random_state=self._parametrization.random_state,
)

def compute_pseudotime(self, input_parameter: tp.Any, loss: tp.Loss) -> float:
Expand All @@ -264,16 +271,17 @@ def _noisy_call(
func: tp.Callable[[np.ndarray], float],
noise_level: float,
noise_dissymmetry: bool,
random_state: np.random.RandomState,
) -> float: # pylint: disable=unused-argument
x_transf = transf(x)
fx = func(x_transf)
noise = 0
if noise_level:
if not noise_dissymmetry or x_transf.ravel()[0] <= 0:
side_point = transf(x + np.random.normal(0, 1, size=len(x)))
side_point = transf(x + random_state.normal(0, 1, size=len(x)))
if noise_dissymmetry:
noise_level *= 1.0 + x_transf.ravel()[0] * 100.0
noise = noise_level * np.random.normal(0, 1) * (func(side_point) - fx)
noise = noise_level * random_state.normal(0, 1) * (func(side_point) - fx)
return fx + noise


Expand Down
22 changes: 15 additions & 7 deletions nevergrad/functions/test_functionlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,25 @@ def test_ptb_no_overfitting() -> None:


@testing.parametrized(
sphere=({"name": "sphere", "block_dimension": 3, "useless_variables": 6, "num_blocks": 2}, 9.630),
cigar=({"name": "cigar", "block_dimension": 3, "useless_variables": 6, "num_blocks": 2}, 3527289.665),
sphere=(
{"name": "sphere", "block_dimension": 3, "useless_variables": 6, "num_blocks": 2},
13.377591870607294,
),
cigar=(
{"name": "cigar", "block_dimension": 3, "useless_variables": 6, "num_blocks": 2},
12492378.626191331,
),
cigar_rot=(
{"rotation": True, "name": "cigar", "block_dimension": 3, "useless_variables": 6, "num_blocks": 2},
5239413.576,
2575881.272645816,
),
hashed=(
{"name": "sphere", "block_dimension": 3, "useless_variables": 6, "num_blocks": 2, "hashing": True},
6.174957533,
8.916424986561422,
),
noisy_sphere=(
{"name": "sphere", "block_dimension": 3, "useless_variables": 6, "num_blocks": 2, "noise_level": 0.2},
9.576,
14.512132049518083,
),
noisy_very_sphere=(
{
Expand All @@ -95,12 +101,13 @@ def test_ptb_no_overfitting() -> None:
"noise_dissymmetry": True,
"noise_level": 0.2,
},
7.615,
19.33566196119778,
),
)
def test_testcase_function_value(config: tp.Dict[str, tp.Any], expected: float) -> None:
# make sure no change is made to the computation
func = functionlib.ArtificialFunction(**config)
np.random.seed(1) # don't know how to control to randomness
func = func.copy()
np.random.seed(2) # initialization is delayed
x = np.random.normal(0, 1, func.dimension)
Expand Down Expand Up @@ -173,7 +180,7 @@ def test_compute_pseudotime() -> None:
np.testing.assert_equal(func.compute_pseudotime(((x,), {}), 3), 1.0)
np.random.seed(12)
func = functionlib.ArtificialFunction("DelayedSphere", 2)
np.testing.assert_almost_equal(func.compute_pseudotime(((x,), {}), 3), 0.00034702)
np.testing.assert_almost_equal(func.compute_pseudotime(((x,), {}), 3), 0.00025003021607278633)
# check minimum
np.random.seed(None)
func = functionlib.ArtificialFunction("DelayedSphere", 2)
Expand All @@ -197,6 +204,7 @@ def test_noisy_call(x: int, noise: bool, noise_dissymmetry: bool, expect_noisy:
func=lambda y: np.arctanh(y)[0], # type: ignore
noise_level=float(noise),
noise_dissymmetry=noise_dissymmetry,
random_state=np.random.RandomState(0),
)
assert not np.isnan(fx) # noise addition should not get out of function domain
if expect_noisy:
Expand Down
15 changes: 12 additions & 3 deletions nevergrad/functions/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,23 @@ class Transform:
which can be applied to a point
"""

def __init__(self, indices: tp.List[int], translation_factor: float = 1, rotation: bool = False) -> None:
def __init__(
self,
indices: tp.List[int],
translation_factor: float = 1,
rotation: bool = False,
random_state: np.random.RandomState = None,
) -> None:
dim = len(indices)
assert dim
if random_state is None:
random_state = np.random.RandomState(0)
random_state.set_state(np.random.get_state())
self.indices = np.asarray(indices)
self.translation: np.ndarray = np.random.normal(0, 1, dim) * translation_factor
self.translation: np.ndarray = random_state.normal(0, 1, dim) * translation_factor
self.rotation_matrix: tp.Optional[np.ndarray] = None
if rotation:
self.rotation_matrix = np.linalg.qr(np.random.normal(0, 1, size=(dim, dim)))[0]
self.rotation_matrix = np.linalg.qr(random_state.normal(0, 1, size=(dim, dim)))[0]

def __call__(self, x: np.ndarray) -> np.ndarray:
y: np.ndarray = x[self.indices] - self.translation
Expand Down
2 changes: 1 addition & 1 deletion nevergrad/optimization/mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def coordinatewise_mutation(
discrete_data = discretization.threshold_discretization(parent, arity=arity)
discrete_data = np.where(
boolean_vector,
discrete_data + np.random.choice([-1.0, 1.0], size=dimension) * velocity,
discrete_data + self.random_state.choice([-1.0, 1.0], size=dimension) * velocity,
discrete_data,
)
return discretization.inverse_threshold_discretization(discrete_data)
Expand Down
2 changes: 1 addition & 1 deletion nevergrad/optimization/optimizerlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __init__(
if mutation == "adaptive":
self._adaptive_mr = 0.5
if mutation == "coordinatewise_adaptive":
self._velocity = np.random.uniform(size=self.dimension) * arity / 4.0
self._velocity = self._rng.uniform(size=self.dimension) * arity / 4.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch!
I would have expected this to get caught by the CI though :s

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same for the 3 changes below, nice catches!

self._modified_variables = np.array([True] * self.dimension)
self.noise_handling = noise_handling
self.mutation = mutation
Expand Down
2 changes: 1 addition & 1 deletion nevergrad/optimization/recaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def _internal_ask_candidate(self) -> p.Parameter:
FinishedUnderlyingOptimizerWarning,
)
self._check_error()
data = np.random.normal(0, 1, self.dimension)
data = self._rng.normal(0, 1, self.dimension)
return self.parametrization.spawn_child().set_standardized_data(data)
message = messages[0] # take oldest message
message.meta["asked"] = True # notify that it has been asked so that it is not selected again
Expand Down
2 changes: 1 addition & 1 deletion nevergrad/parametrization/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def random_state(self) -> np.random.RandomState:
"""
if self._random_state is None:
# use the setter, to make sure the random state is propagated to the variables
seed = np.random.randint(2 ** 32, dtype=np.uint32)
seed = np.random.randint(2 ** 32, dtype=np.uint32) # better way?
self._set_random_state(np.random.RandomState(seed))
assert self._random_state is not None
return self._random_state
Expand Down
2 changes: 1 addition & 1 deletion nevergrad/parametrization/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def sample(self: A) -> A:
func = (lambda x: x) if self.exponent is None else self._to_reduced_space # noqa
std_bounds = tuple(func(b * np.ones(self._value.shape)) for b in self.bounds)
diff = std_bounds[1] - std_bounds[0]
new_data = std_bounds[0] + np.random.uniform(0, 1, size=diff.shape) * diff
new_data = std_bounds[0] + self.random_state.uniform(0, 1, size=diff.shape) * diff
if self.exponent is None:
new_data = self._to_reduced_space(new_data)
child.set_standardized_data(new_data - self._get_ref_data(), deterministic=False)
Expand Down
2 changes: 1 addition & 1 deletion nevergrad/parametrization/mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def apply(self, arrays: tp.Sequence[Array]) -> None:
# slices
e_weights = np.exp(rolling_mean(self.parameters["positions"].value, size))
probas = e_weights / np.sum(e_weights)
index = np.random.choice(range(length), p=probas)
index = self.random_state.choice(range(length), p=probas)
# update (inefficient)
shape = tuple(size if a == self.axis else s for a, s in enumerate(arrays[0].value.shape))
data[tuple(slice(s) for s in shape)] += self.random_state.normal(0, 1, size=shape)
Expand Down