Merge pull request #86 from HIIT/bolfi_fix

Bolfi fix
elfi-dev · Dec 2, 2016 · 41a0ae5 · 41a0ae5
2 parents 5df48f0 + 88d72be
commit 41a0ae5
Show file tree

Hide file tree

Showing 16 changed files with 695 additions and 174 deletions.
diff --git a/elfi/bo/acquisition.py b/elfi/bo/acquisition.py
@@ -203,6 +203,38 @@ def acquire(self, n_values, pending_locations=None):
         return ret
 
 
+class RandomAcquisition(AcquisitionBase):
+    """Acquisition purely from priors. This can be useful if parameters
+    in certain regions are forbidden (i.e. their pdf is zero).
+
+    Parameters
+    ----------
+    prior_list : list of Prior objects
+
+    """
+
+    def __init__(self, prior_list, *args, **kwargs):
+        self.prior_list = prior_list
+        n_priors = len(prior_list)
+
+        # hacky...
+        class DummyModel(object):
+            pass
+        model = DummyModel()
+        model.input_dim = n_priors
+        model.bounds = tuple(zip([0]*n_priors, [1]*n_priors))
+        model.evaluate = lambda x : exec('raise NotImplementedError')
+
+        super(RandomAcquisition, self).__init__(*args, model=model, **kwargs)
+
+    def acquire(self, n_values, pending_locations=None):
+        ret = super(RandomAcquisition, self).acquire(n_values, pending_locations)
+        for i, p in enumerate(self.prior_list):
+            ret[:, i] = p.generate(n_values).compute().ravel()
+        logger.debug("Acquired {}".format(n_values))
+        return ret
+
+
 class RbfAtPendingPointsMixin(AcquisitionBase):
     """ Adds RBF kernels at pending point locations """
 

diff --git a/elfi/bo/gpy_model.py b/elfi/bo/gpy_model.py
@@ -34,13 +34,21 @@ class GPyModel():
         Optimizer to use for adjusting model parameters.
         Alternatives: "scg", "fmin_tnc", "simplex", "lbfgsb", "lbfgs", "sgd"
         See also: paramz.Model.optimize()
-    n_opt_iters : int
+    max_opt_iters : int
         Number of optimization iterations to run after each observed sample.
+
+
+    Possible TODOs:
+    - allow initialization with samples, which give hints to initial kernel params
+    - allow giving GP object as parameter
+    - priors for the GP
+    - allow kernel bias term
+
     """
 
     def __init__(self, input_dim=1, bounds=None, kernel=None,
-                 kernel_class=GPy.kern.RBF, kernel_var=1.0, kernel_scale=0.1,
-                 noise_var=0.0, optimizer="scg", n_opt_iters=0):
+                 kernel_class=GPy.kern.RBF, kernel_var=1.0, kernel_scale=1.,
+                 noise_var=0.5, optimizer="scg", max_opt_iters=50):
         self.input_dim = input_dim
         if self.input_dim < 1:
             raise ValueError("Input dimension needs to be larger than 1. " +
@@ -56,7 +64,7 @@ def __init__(self, input_dim=1, bounds=None, kernel=None,
                     "Expected {}. Received {}.".format(self.input_dim, len(self_bounds)))
         self.noise_var = noise_var
         self.optimizer = optimizer
-        self.n_opt_iters = n_opt_iters
+        self.max_opt_iters = max_opt_iters
         self.gp = None
         self.set_kernel(kernel, kernel_class, kernel_var, kernel_scale)
 
@@ -106,7 +114,7 @@ def set_noise_var(self, noise_var=0.0):
         self.noise_var = noise_var
         if self.gp is not None:
             # re-fit gp with new noise variance
-            self.gp = self._fit_gp(self.gp.X, self.gp.Y)
+            self._fit_gp(self.gp.X, self.gp.Y)
 
     def set_kernel(self, kernel=None, kernel_class=None, kernel_var=None,
                    kernel_scale=None):
@@ -132,14 +140,19 @@ def set_kernel(self, kernel=None, kernel_class=None, kernel_var=None,
                                             lengthscale=self.kernel_scale)
         if self.gp is not None:
             # re-fit gp with new kernel
-            self.gp = self._fit_gp(self.gp.X, self.gp.Y)
+            self._fit_gp(self.gp.X, self.gp.Y)
 
     def _fit_gp(self, X, Y):
-        """Constructs the gp model and returns it.
+        """Constructs the gp model.
         """
-        return GPy.models.GPRegression(X=X, Y=Y,
-                                       kernel=self.kernel,
-                                       noise_var=self.noise_var)
+        self.gp = GPy.models.GPRegression(X=X, Y=Y,
+                                          kernel=self.kernel,
+                                          noise_var=self.noise_var)
+
+        # FIXME: move to initialization
+        self.gp.kern.lengthscale.set_prior(GPy.priors.Gamma.from_EV(1.,100.), warning=False)
+        self.gp.kern.variance.set_prior(GPy.priors.Gamma.from_EV(1.,100.), warning=False)
+        self.gp.likelihood.variance.set_prior(GPy.priors.Gamma.from_EV(1.,100.), warning=False)
 
     def _within_bounds(self, x):
         """Returns true if location x is within model bounds.
@@ -190,29 +203,29 @@ def update(self, X, Y):
         if self.gp is not None:
             X = np.vstack((self.gp.X, X))
             Y = np.vstack((self.gp.Y, Y))
-        self.gp = self._fit_gp(X, Y)
+        self._fit_gp(X, Y)
         self.optimize()
 
-    def optimize(self, n_opt_iters=None, fail_on_error=False):
+    def optimize(self, max_opt_iters=None, fail_on_error=False):
         """Optimize GP kernel parameters.
 
         Parameters
         ----------
-        n_opt_iters : int or None
+        max_opt_iters : int or None
             Maximum number of optimization iterations.
-            If None, will use self.n_opt_iters.
+            If None, will use self.max_opt_iters.
         fail_on_error : bool
             If False, will try to continue function in case
             a numerical error takes place in optimization.
         """
         if self.gp is None:
             return
-        if n_opt_iters is None:
-            n_opt_iters = self.n_opt_iters
-        if n_opt_iters < 1:
+        if max_opt_iters is None:
+            max_opt_iters = self.max_opt_iters
+        if max_opt_iters < 1:
             return
         try:
-            self.gp.optimize(self.optimizer, max_iters=n_opt_iters)
+            self.gp.optimize(self.optimizer, max_iters=max_opt_iters)
         except np.linalg.linalg.LinAlgError:
             logger.warning("{}: Numerical error in GP optimization. Attempting to continue."
                     .format(self.__class__.__name__))
@@ -227,3 +240,14 @@ def n_observations(self):
             return 0
         return self.gp.num_data
 
+    def copy(self):
+        model = GPyModel(input_dim=self.input_dim,
+                         bounds=self.bounds[:],
+                         kernel=self.kernel.copy(),
+                         noise_var=self.noise_var,
+                         optimizer=self.optimizer,
+                         max_opt_iters=self.max_opt_iters)
+        if self.gp is not None:
+            model._fit_gp(self.gp.X[:], self.gp.Y[:])
+        return model
+
diff --git a/elfi/core.py b/elfi/core.py
@@ -331,8 +331,12 @@ def read_data(self, node_name, sl):
         """
         raise NotImplementedError
 
-    def reset(self):
+    def reset(self, node_name):
         """Reset the store to the initial state. All results will be cleared.
+
+        Parameters
+        ----------
+        node_name : string
         """
         raise NotImplementedError
 
@@ -377,7 +381,7 @@ def _write(self, key, output_result):
         """
         raise NotImplementedError
 
-    def _reset(self):
+    def _reset(self, name):
         """Operation for resetting storage object (optional).
         """
         pass
@@ -397,9 +401,9 @@ def read_data(self, node_name, sl):
         key = make_key(name, sl)
         return delayed(self._read_data(node_name, sl), name=key, pure=True)
 
-    def reset(self):
+    def reset(self, node_name):
         self._pending_persisted.clear()
-        self._reset()
+        self._reset(node_name)
 
     # Issue https://github.com/dask/distributed/issues/647
     @gen.coroutine
@@ -474,10 +478,45 @@ def read_data(self, node_name, sl):
             raise IndexError("No matching slice found.")
         return get_named_item(output[0], 'data')
 
-    def reset(self):
+    def reset(self, node_name):
         self._persisted.clear()
 
 
+def prepare_store(store):
+    """Takes in user-originated specifier for 'store' and
+    returns a corresponding ElfiStore derivative or raises
+    a value error.
+
+    Parameters
+    ----------
+    store : various
+        None : means data is not stored.
+        ElfiStore derivative : stores data according to specification.
+        String identifiers :
+            "cache" : Creates a MemoryStore()
+        Sliceable object : is converted to LocalDataStore(obj)
+            Examples: local numpy array, h5py instance.
+            The size of the object must be at least (n_samples, ) + data.shape
+            The slicing must be consistent:
+                obj[sl] = d must guarantee that obj[sl] == d
+                For example, an empty list will not guarantee this, but a pre-allocated will.
+            See also: LocalDataStore
+
+    Returns
+    -------
+    ElfiStore instance or None is store is None
+    """
+    if store is None:
+        return None
+    if isinstance(store, ElfiStore):
+        return store
+    if type(store) == str:
+        if store.lower() == "cache":
+            return MemoryStore()
+        raise ValueError("Unknown store identifier '{}'".format(store))
+    return LocalDataStore(store)
+
+
 class DelayedOutputCache:
     """Handles a continuous list of delayed outputs for a node.
     """
@@ -486,36 +525,16 @@ def __init__(self, node_name, store=None):
 
         Parameters
         ----------
-        store : None, ElfiStore, string, or sliceable object
-            None : means data is not stored.
-            ElfiStore derivative : stores data according to specification.
-            String identifiers :
-                "cache" : Creates a MemoryStore()
-            Sliceable object : is converted to LocalDataStore(obj)
-                Examples: local numpy array, h5py instance.
-                The size of the object must be at least (n_samples, ) + data.shape
-                The slicing must be consistent:
-                    obj[sl] = d must guarantee that obj[sl] == d
-                    For example, an empty list will not guarantee this, but a pre-allocated will.
-                See also: LocalDataStore
+        node_name : string
+            Name of node that owns this instance.
+        store : various (optional)
+            See prepare_store interface.
         """
         self._delayed_outputs = []
         self._stored_mask = []
-        self._store = self._prepare_store(store)
+        self._store = prepare_store(store)
         self._node_name = node_name
 
-    def _prepare_store(self, store):
-        # Handle local store objects
-        if store is None:
-            return None
-        if isinstance(store, ElfiStore):
-            return store
-        if type(store) == str:
-            if store.lower() == "cache":
-                return MemoryStore()
-            raise ValueError("Unknown store identifier '{}'".format(store))
-        return LocalDataStore(store)
-
     def __len__(self):
         l = 0
         for o in self._delayed_outputs:
@@ -538,7 +557,7 @@ def reset(self):
         del self._delayed_outputs[:]
         del self._stored_mask[:]
         if self._store is not None:
-            self._store.reset()
+            self._store.reset(self._node_name)
 
     def __getitem__(self, sl):
         """
@@ -704,14 +723,25 @@ def normalize_data_dict(dict, n):
 
 class Operation(Node):
     def __init__(self, name, operation, *parents, store=None):
-        """
+        """Operation node transforms data from parents to output
+        that is given to the node's children.
+
+        The operation should in general take 'input_dicts' from
+        the parent nodes as input. The function signature may be
+        different for different operations.
+
+        The operation should return a single 'output_dict'.
 
         Parameters
         ----------
-        name : name of the node
-        operation : node operation function
-        *parents : parents of the nodes
-        store : `OutputStore` instance
+        name : string
+            Name of the node.
+        operation : callable
+            Node operation function.
+        *parents : list of nodes (optional)
+            Parents of the node.
+        store : various (optional)
+            See prepare_store interface.
         """
         super(Operation, self).__init__(name, *parents)
         self.operation = operation
@@ -721,8 +751,7 @@ def __init__(self, name, operation, *parents, store=None):
         self.reset(propagate=False)
 
     def acquire(self, n, starting=0, batch_size=None):
-        """
-        Acquires values from the start or from starting index.
+        """Acquires values from the start or from starting index.
         Generates new ones if needed and updates the _generate_index.
         """
         sl = slice(starting, starting+n)