Add and use random_state in the parameterization

facebookresearch · Dec 26, 2020 · baa41f9 · baa41f9
1 parent e2f6f81
commit baa41f9
Show file tree

Hide file tree

Showing 9 changed files with 46 additions and 26 deletions.
diff --git a/nevergrad/benchmark/test_xpbase.py b/nevergrad/benchmark/test_xpbase.py
@@ -55,14 +55,16 @@ def test_noisy_artificial_function_loss() -> None:
     func = ArtificialFunction(name="sphere", block_dimension=5, noise_level=0.3)
     seed = np.random.randint(99999)
     xp = xpbase.Experiment(func, optimizer="OnePlusOne", budget=5, seed=seed)
+    # Because copy() can have different random initialization for the parameters
+    # The function should be copied early.
+    np.random.seed(seed)
+    pfunc = func.copy()
     xp.run()
     loss_ref = xp.result["loss"]
     # now with copy
     assert xp._optimizer is not None
     reco = xp._optimizer.provide_recommendation()
     assert reco is not None
-    np.random.seed(seed)
-    pfunc = func.copy()
     np.testing.assert_equal(pfunc.evaluation_function(reco), loss_ref)
     np.random.seed(None)
 

diff --git a/nevergrad/functions/functionlib.py b/nevergrad/functions/functionlib.py
@@ -30,6 +30,7 @@ def __init__(
         rotation: bool,
         hashing: bool,
         only_index_transform: bool,
+        random_state: np.random.RandomState,
     ) -> None:
         self._dimension = dimension
         self._transforms: tp.List[utils.Transform] = []
@@ -40,21 +41,25 @@ def __init__(
         self.only_index_transform = only_index_transform
         self.hashing = hashing
         self.dimension = self._dimension
+        self.random_state = random_state
 
     def _initialize(self) -> None:
         """Delayed initialization of the transforms to avoid slowing down the instance creation
         (makes unit testing much faster).
         This functions creates the random transform used upon each block (translation + optional rotation).
         """
         # use random indices for blocks
-        indices = np.random.choice(
+        indices = self.random_state.choice(
             self._dimension, self.block_dimension * self.num_blocks, replace=False
         ).tolist()
         indices.sort()  # keep the indices sorted sorted so that blocks do not overlap
         for transform_inds in tools.grouper(indices, n=self.block_dimension):
             self._transforms.append(
                 utils.Transform(
-                    transform_inds, translation_factor=self.translation_factor, rotation=self.rotation
+                    transform_inds,
+                    translation_factor=self.translation_factor,
+                    rotation=self.rotation,
+                    random_state=self.random_state,
                 )
             )
 
@@ -65,9 +70,9 @@ def process(  # pylint: disable=unused-argument
             self._initialize()
         if self.hashing:
             data2 = np.array(data, copy=True)
-            state = np.random.get_state()
+            state = np.random.get_state()  # Paco
             for i, y in enumerate(data):
-                np.random.seed(int(hashlib.md5(str(y).encode()).hexdigest(), 16) % 500000)  # type: ignore
+                self.random_state.seed(int(hashlib.md5(str(y).encode()).hexdigest(), 16) % 500000)  # type: ignore
                 data2[i] = np.random.normal(0.0, 1.0)  # type: ignore
             np.random.set_state(state)
             data = data2
@@ -170,16 +175,7 @@ def __init__(  # pylint: disable=too-many-arguments
         # special case
         info = corefuncs.registry.get_info(self._parameters["name"])
         only_index_transform = info.get("no_transform", False)
-        # variable
-        self.transform_var = ArtificialVariable(
-            dimension=self._dimension,
-            num_blocks=num_blocks,
-            block_dimension=block_dimension,
-            translation_factor=translation_factor,
-            rotation=rotation,
-            hashing=hashing,
-            only_index_transform=only_index_transform,
-        )
+
         assert not (split and hashing)
         assert not (split and useless_variables > 0)
         parametrization = (
@@ -194,6 +190,17 @@ def __init__(  # pylint: disable=too-many-arguments
         if noise_level > 0:
             parametrization.descriptors.deterministic_function = False
         super().__init__(self.noisy_function, parametrization)
+        # variable, must come after super().__init__(...) to bind the random_state
+        self.transform_var = ArtificialVariable(
+            dimension=self._dimension,
+            num_blocks=num_blocks,
+            block_dimension=block_dimension,
+            translation_factor=translation_factor,
+            rotation=rotation,
+            hashing=hashing,
+            only_index_transform=only_index_transform,
+            random_state=self._parametrization.random_state,
+        )
         self._aggregator = {"max": np.max, "mean": np.mean, "sum": np.sum}[aggregator]
         info = corefuncs.registry.get_info(self._parameters["name"])
         # add descriptors
@@ -242,6 +249,7 @@ def noisy_function(self, x: tp.ArrayLike) -> float:
             func=self.function_from_transform,
             noise_level=self._parameters["noise_level"],
             noise_dissymmetry=self._parameters["noise_dissymmetry"],
+            random_state=self._parametrization.random_state,
         )
 
     def compute_pseudotime(self, input_parameter: tp.Any, loss: tp.Loss) -> float:
@@ -264,16 +272,17 @@ def _noisy_call(
     func: tp.Callable[[np.ndarray], float],
     noise_level: float,
     noise_dissymmetry: bool,
+    random_state: np.random.RandomState = None,
 ) -> float:  # pylint: disable=unused-argument
     x_transf = transf(x)
     fx = func(x_transf)
     noise = 0
     if noise_level:
         if not noise_dissymmetry or x_transf.ravel()[0] <= 0:
-            side_point = transf(x + np.random.normal(0, 1, size=len(x)))
+            side_point = transf(x + random_state.normal(0, 1, size=len(x)))
             if noise_dissymmetry:
                 noise_level *= 1.0 + x_transf.ravel()[0] * 100.0
-            noise = noise_level * np.random.normal(0, 1) * (func(side_point) - fx)
+            noise = noise_level * random_state.normal(0, 1) * (func(side_point) - fx)
     return fx + noise
 
 

diff --git a/nevergrad/functions/test_functionlib.py b/nevergrad/functions/test_functionlib.py
@@ -197,6 +197,7 @@ def test_noisy_call(x: int, noise: bool, noise_dissymmetry: bool, expect_noisy:
         func=lambda y: np.arctanh(y)[0],  # type: ignore
         noise_level=float(noise),
         noise_dissymmetry=noise_dissymmetry,
+        random_state=np.random.get_state(),
     )
     assert not np.isnan(fx)  # noise addition should not get out of function domain
     if expect_noisy:

diff --git a/nevergrad/functions/utils.py b/nevergrad/functions/utils.py
@@ -12,14 +12,22 @@ class Transform:
     which can be applied to a point
     """
 
-    def __init__(self, indices: tp.List[int], translation_factor: float = 1, rotation: bool = False) -> None:
+    def __init__(
+        self,
+        indices: tp.List[int],
+        translation_factor: float = 1,
+        rotation: bool = False,
+        random_state: np.random.RandomState = None,
+    ) -> None:
         dim = len(indices)
         assert dim
+        if random_state is None:
+            random_state = np.random.get_state()
         self.indices = np.asarray(indices)
-        self.translation: np.ndarray = np.random.normal(0, 1, dim) * translation_factor
+        self.translation: np.ndarray = random_state.normal(0, 1, dim) * translation_factor
         self.rotation_matrix: tp.Optional[np.ndarray] = None
         if rotation:
-            self.rotation_matrix = np.linalg.qr(np.random.normal(0, 1, size=(dim, dim)))[0]
+            self.rotation_matrix = np.linalg.qr(random_state.normal(0, 1, size=(dim, dim)))[0]
 
     def __call__(self, x: np.ndarray) -> np.ndarray:
         y: np.ndarray = x[self.indices] - self.translation

diff --git a/nevergrad/optimization/mutations.py b/nevergrad/optimization/mutations.py
@@ -87,7 +87,7 @@ def coordinatewise_mutation(
         discrete_data = discretization.threshold_discretization(parent, arity=arity)
         discrete_data = np.where(
             boolean_vector,
-            discrete_data + np.random.choice([-1.0, 1.0], size=dimension) * velocity,
+            discrete_data + self.random_state.choice([-1.0, 1.0], size=dimension) * velocity,
             discrete_data,
         )
         return discretization.inverse_threshold_discretization(discrete_data)

diff --git a/nevergrad/optimization/optimizerlib.py b/nevergrad/optimization/optimizerlib.py
@@ -110,7 +110,7 @@ def __init__(
         if mutation == "adaptive":
             self._adaptive_mr = 0.5
         if mutation == "coordinatewise_adaptive":
-            self._velocity = np.random.uniform(size=self.dimension) * arity / 4.0
+            self._velocity = self._rng.uniform(size=self.dimension) * arity / 4.0
             self._modified_variables = np.array([True] * self.dimension)
         self.noise_handling = noise_handling
         self.mutation = mutation

diff --git a/nevergrad/optimization/recaster.py b/nevergrad/optimization/recaster.py
@@ -208,7 +208,7 @@ def _internal_ask_candidate(self) -> p.Parameter:
                 FinishedUnderlyingOptimizerWarning,
             )
             self._check_error()
-            data = np.random.normal(0, 1, self.dimension)
+            data = self._rng.normal(0, 1, self.dimension)
             return self.parametrization.spawn_child().set_standardized_data(data)
         message = messages[0]  # take oldest message
         message.meta["asked"] = True  # notify that it has been asked so that it is not selected again

diff --git a/nevergrad/parametrization/data.py b/nevergrad/parametrization/data.py
@@ -201,7 +201,7 @@ def sample(self: A) -> A:
         func = (lambda x: x) if self.exponent is None else self._to_reduced_space  # noqa
         std_bounds = tuple(func(b * np.ones(self._value.shape)) for b in self.bounds)
         diff = std_bounds[1] - std_bounds[0]
-        new_data = std_bounds[0] + np.random.uniform(0, 1, size=diff.shape) * diff
+        new_data = std_bounds[0] + self.random_state.uniform(0, 1, size=diff.shape) * diff
         if self.exponent is None:
             new_data = self._to_reduced_space(new_data)
         child.set_standardized_data(new_data - self._get_ref_data(), deterministic=False)

diff --git a/nevergrad/parametrization/mutation.py b/nevergrad/parametrization/mutation.py
@@ -240,7 +240,7 @@ def apply(self, arrays: tp.Sequence[Array]) -> None:
         # slices
         e_weights = np.exp(rolling_mean(self.parameters["positions"].value, size))
         probas = e_weights / np.sum(e_weights)
-        index = np.random.choice(range(length), p=probas)
+        index = self.random_state.choice(range(length), p=probas)
         # update (inefficient)
         shape = tuple(size if a == self.axis else s for a, s in enumerate(arrays[0].value.shape))
         data[tuple(slice(s) for s in shape)] += self.random_state.normal(0, 1, size=shape)