Merge pull request #188 from data61/feature/#186

Feature/#186
gradientinstitute · Jul 10, 2018 · 77d0582 · 77d0582
2 parents 046d15a + 1789098
commit 77d0582
Show file tree

Hide file tree

Showing 9 changed files with 191 additions and 234 deletions.
diff --git a/aboleth/distributions.py b/aboleth/distributions.py
@@ -40,8 +40,9 @@ def norm_posterior(dim, std0, suffix=None):
     ----------
     dim : tuple or list
         the dimension of this distribution.
-    std0 : float
+    std0 : float, np.array
         the initial (unoptimized) standard deviation of this distribution.
+        Must be a scalar or have the same shape as dim.
     suffix : str
         suffix to add to the names of the variables of the parameters of this
         distribution.
@@ -60,11 +61,10 @@ def norm_posterior(dim, std0, suffix=None):
     ``std0`` and a beta of 1.
 
     """
-    mu_0 = tf.random_normal(dim, stddev=std0, seed=next(seedgen))
+    # we have different values for each dimension on the first axis
+    mu_0 = np.zeros(dim, dtype=np.float32)
     mu = tf.Variable(mu_0, name=_add_suffix("W_mu_q", suffix))
-
-    std_0 = tf.random_gamma(alpha=std0, shape=dim, seed=next(seedgen))
-    std = tf.Variable(std_0, name=_add_suffix("W_std_q", suffix))
+    std = tf.Variable(std0, name=_add_suffix("W_std_q", suffix))
 
     summary_histogram(mu)
     summary_histogram(std)

diff --git a/aboleth/kernels.py b/aboleth/kernels.py
@@ -4,6 +4,7 @@
 
 from aboleth.random import seedgen
 from aboleth.distributions import norm_posterior, kl_sum
+from aboleth.util import pos, summary_histogram
 
 
 #
@@ -15,18 +16,22 @@ class ShiftInvariant:
 
     Parameters
     ----------
-    lenscale : float, ndarray, Tensor, Variable
-        the length scales of the shift invariant kernel, this can be a scalar
-        for an isotropic kernel, or a vector of shape (input_dim, 1) for an
-        automatic relevance detection (ARD) kernel. If you wish to learn this
-        parameter, make it a Variable (or ``ab.pos(tf.Variable(...))`` to keep
-        it positively constrained).
+    lenscale : float, ndarray, optional
+        The length scales of the shift invariant kernel. This can be a scalar
+        for an isotropic kernel, or a vector of shape (input_dim,) for an
+        automatic relevance detection (ARD) kernel. If learn_lenscale is
+        True, lenscale will be its initial value.
+    learn_lenscale : bool, optional
+        Whether to learn the length scale. If True, the lenscale
+        value provided (or its default) is used for initialisation.
 
     """
 
-    def __init__(self, lenscale=1.0):
+    def __init__(self, lenscale=None, learn_lenscale=False):
         """Constuct a shift invariant kernel object."""
-        self.lenscale = lenscale
+        self.given_lenscale = lenscale
+        self.learn_lenscale = learn_lenscale
+        self.lenscale = None
 
     def weights(self, input_dim, n_features, dtype=np.float32):
         """Generate the random fourier weights for this kernel.
@@ -60,12 +65,16 @@ class RBF(ShiftInvariant):
 
     Parameters
     ----------
-    lenscale : float, ndarray, Tensor, Variable
-        the length scales of the shift invariant kernel, this can be a scalar
-        for an isotropic kernel, or a vector of shape (input_dim, 1) for an
-        automatic relevance detection (ARD) kernel. If you wish to learn this
-        parameter, make it a Variable (or ``ab.pos(tf.Variable(...))`` to keep
-        it positively constrained).
+    lenscale : float, ndarray, optional
+        The length scales of the RBF kernel. This can be a scalar
+        for an isotropic kernel, or a vector of shape (input_dim,) for an
+        automatic relevance detection (ARD) kernel. If not provided, it will
+        be set to ``sqrt(1 / input_dim)`` (this is similar to the 'auto'
+        setting for a scikit learn SVM with a RBF kernel).
+        If learn_lenscale is True, lenscale will be its initial value.
+    learn_lenscale : bool, optional
+        Whether to learn the length scale. If True, the lenscale
+        value provided (or its default) is used for initialisation.
 
     """
 
@@ -92,9 +101,12 @@ def weights(self, input_dim, n_features, dtype=np.float32):
             the KL penalty associated with the parameters in this kernel (0.0).
 
         """
+        self.lenscale, _ = _init_lenscale(self.given_lenscale,
+                                          self.learn_lenscale,
+                                          input_dim)
         rand = np.random.RandomState(next(seedgen))
         e = rand.randn(input_dim, n_features).astype(dtype)
-        P = e / self.lenscale
+        P = e / tf.expand_dims(self.lenscale, axis=-1)
         return P, 0.
 
 
@@ -106,25 +118,24 @@ class RBFVariational(ShiftInvariant):
 
     Parameters
     ----------
-    lenscale : float, ndarray, Tensor, Variable
-        the length scales of the shift invariant kernel, this can be a scalar
-        for an isotropic kernel, or a vector of shape (input_dim, 1) for an
-        automatic relevance detection (ARD) kernel. If you wish to learn this
-        parameter, make it a Variable (or ``ab.pos(tf.Variable(...))`` to keep
-        it positively constrained).
-    lenscale_posterior : float, ndarray, optional
-        the *initial* value for the posterior length scale, this can be a
-        scalar or vector (different initial value per input dimension). If this
-        is left as None, it will be set to ``sqrt(1 / input_dim)`` (this is
-        similar to the 'auto' setting for a scikit learn SVM with a RBF
-        kernel).
+    lenscale : float, ndarray, optional
+        The length scales of the RBF kernel. This can be a scalar
+        for an isotropic kernel, or a vector of shape (input_dim,) for an
+        automatic relevance detection (ARD) kernel. If not provided, it will
+        be set to ``sqrt(1 / input_dim)`` (this is similar to the 'auto'
+        setting for a scikit learn SVM with a RBF kernel).
+        If learn_lenscale is True, lenscale will be the initial value of the
+        prior precision of the Fourier weight distribution.
+    learn_lenscale : bool, optional
+        Whether to learn the (prior) length scale. If True, the lenscale
+        value provided (or its default) is used for initialisation.
 
     """
 
-    def __init__(self, lenscale=1.0, lenscale_posterior=None):
+    def __init__(self, lenscale=None, learn_lenscale=False):
         """Constuct an instance of the RBFVariational kernel."""
-        super().__init__(lenscale)
-        self.lenscale_post = lenscale_posterior
+        self.lenscale_post = None
+        super().__init__(lenscale, learn_lenscale)
 
     def weights(self, input_dim, n_features, dtype=np.float32):
         """Generate the random fourier weights for this kernel.
@@ -149,19 +160,21 @@ def weights(self, input_dim, n_features, dtype=np.float32):
             the KL penalty associated with the parameters in this kernel.
 
         """
+        self.lenscale, self.lenscale_post = _init_lenscale(self.given_lenscale,
+                                                           self.learn_lenscale,
+                                                           input_dim)
         dim = (input_dim, n_features)
 
         # Setup the prior, lenscale may be a variable, so dont use prior_normal
+        pP_scale = self.__len2std(self.lenscale, n_features)
         pP = tf.distributions.Normal(
             loc=tf.zeros(dim),
-            scale=self.__len2std(self.lenscale)
-        )
-
+            scale=pP_scale)
         # Initialise the posterior
-        if self.lenscale_post is None:
-            self.lenscale_post = np.sqrt(1 / input_dim)
-        qP = norm_posterior(dim=dim, std0=self.__len2std(self.lenscale_post),
-                            suffix="kernel")
+        qP_scale = 1.0 / self.lenscale_post
+        if qP_scale.ndim > 0:
+            qP_scale = np.repeat(qP_scale[:, np.newaxis], n_features, axis=1)
+        qP = norm_posterior(dim=dim, std0=qP_scale, suffix="kernel")
 
         KL = kl_sum(qP, pP)
 
@@ -174,8 +187,8 @@ def weights(self, input_dim, n_features, dtype=np.float32):
         return P, KL
 
     @staticmethod
-    def __len2std(lenscale):
-        std = tf.to_float(1. / lenscale)
+    def __len2std(lenscale, n_features):
+        std = tf.tile(1.0 / tf.expand_dims(lenscale, axis=-1), (1, n_features))
         return std
 
 
@@ -184,22 +197,26 @@ class Matern(ShiftInvariant):
 
     Parameters
     ----------
-    lenscale : float, ndarray, Tensor, Variable
-        the length scales of the shift invariant kernel, this can be a scalar
-        for an isotropic kernel, or a vector of shape (input_dim, 1) for an
-        automatic relevance detection (ARD) kernel. If you wish to learn this
-        parameter, make it a Variable (or ``ab.pos(tf.Variable(...))`` to keep
-        it positively constrained).
+    lenscale : float, ndarray, optional
+        The length scales of the Matern kernel. This can be a scalar
+        for an isotropic kernel, or a vector of shape (input_dim,) for an
+        automatic relevance detection (ARD) kernel. If not provided, it will
+        be set to ``sqrt(1 / input_dim)`` (this is similar to the 'auto'
+        setting for a scikit learn SVM with a RBF kernel).
+        If learn_lenscale is True, lenscale will be its initial value.
+    learn_lenscale : bool, optional
+        Whether to learn the length scale. If True, the lenscale
+        value provided (or its default) is used for initialisation.
     p : int
         a zero or positive integer specifying the number of the Matern kernel,
         e.g. ``p == 0`` results int a Matern 1/2 kernel, ``p == 1``  results in
         the Matern 3/2 kernel etc.
 
     """
 
-    def __init__(self, lenscale=1.0, p=1):
+    def __init__(self, lenscale=None, learn_lenscale=False, p=1):
         """Constuct a Matern kernel object."""
-        super().__init__(lenscale)
+        super().__init__(lenscale, learn_lenscale)
         assert isinstance(p, int) and p >= 0
         self.p = p
 
@@ -235,9 +252,29 @@ def weights(self, input_dim, n_features, dtype=np.float32):
         # To sample from a m.v. t we use the formula
         # from wikipedia, x = y * np.sqrt(df / u) where y ~ norm(0, I),
         # u ~ chi2(df), then x ~ mvt(0, I, df)
+        self.lenscale, _ = _init_lenscale(self.given_lenscale,
+                                          self.learn_lenscale, input_dim)
         df = 2 * (self.p + 0.5)
         rand = np.random.RandomState(next(seedgen))
         y = rand.randn(input_dim, n_features)
         u = rand.chisquare(df, size=(n_features,))
-        P = (y * np.sqrt(df / u)).astype(dtype) / self.lenscale
+        P = (y * np.sqrt(df / u)).astype(dtype) / \
+            tf.expand_dims(self.lenscale, axis=-1)
         return P, 0.
+
+
+def _init_lenscale(given_lenscale, learn_lenscale, input_dim):
+    """Provide the lenscale variable and its initial value."""
+    given_lenscale = (np.sqrt(1.0 / input_dim) if given_lenscale is None
+                      else np.array(given_lenscale).squeeze()).astype(
+                          np.float32)
+
+    if learn_lenscale:
+        lenscale = tf.Variable(pos(given_lenscale), name="kernel_lenscale")
+        summary_histogram(lenscale)
+    else:
+        lenscale = given_lenscale
+
+    lenscale_vec = tf.ones(input_dim, dtype=tf.float32) * lenscale
+    init_lenscale = given_lenscale * np.ones(input_dim, dtype=np.float32)
+    return lenscale_vec, init_lenscale