Fixes that log-likelelihoods are correctly reduced when no automatic …

…batching is performed by adding an event_ndims attribute to the Transform class Fixes that log-likelelihoods are correctly reduced when no automatic batching is performed by adding an event_ndims attribute to the Transform class Fixes that log-likelelihoods are correctly reduced when no automatic batching is performed by adding an event_ndims attribute to the Transform class Fix some doctests and excludes import of xarray>=0.6.1 because of an import error (arviz-devs/arviz#1387) Fix some doctests and exclude import of version >=0.6.1 of xarray because of arviz-devs/arviz#1387
jdehning · Sep 22, 2020 · b1071f8 · b1071f8
1 parent 42642ef
commit b1071f8
Show file tree

Hide file tree

Showing 5 changed files with 90 additions and 31 deletions.
diff --git a/pymc4/distributions/distribution.py b/pymc4/distributions/distribution.py
@@ -5,6 +5,8 @@
 
 import tensorflow as tf
 from tensorflow_probability import distributions as tfd
+from tensorflow_probability.python.internal import prefer_static
+
 from pymc4.coroutine_model import Model, unpack
 from pymc4.distributions.batchstack import BatchStacker
 from pymc4.distributions import transforms
@@ -49,10 +51,7 @@ def __init__(
         **kwargs,
     ):
         self.conditions, self.base_parameters = self.unpack_conditions(
-            dtype=dtype,
-            validate_args=validate_args,
-            allow_nan_stats=allow_nan_stats,
-            **kwargs,
+            dtype=dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, **kwargs,
         )
         self._distribution = self._init_distribution(self.conditions, **self.base_parameters)
         self._default_new_state_part = None
@@ -78,6 +77,12 @@ def __init__(
         if event_stack is not None:
             self._distribution = tfd.Sample(self._distribution, sample_shape=self.event_stack)
 
+        if self.transform is not None and self.transform.event_ndims is None:
+            event_ndims = prefer_static.rank_from_shape(
+                self._distribution.event_shape_tensor, self._distribution.event_shape
+            )
+            self.transform.event_ndims = event_ndims
+
     @property
     def dtype(self):
         return self._distribution.dtype
@@ -109,8 +114,7 @@ def unpack_conditions(cls, **kwargs) -> Tuple[dict, dict]:
     @property
     def test_value(self):
         return tf.cast(
-            tf.broadcast_to(self._test_value, self.batch_shape + self.event_shape),
-            self.dtype,
+            tf.broadcast_to(self._test_value, self.batch_shape + self.event_shape), self.dtype,
         )
 
     def sample(self, sample_shape=(), seed=None):

diff --git a/pymc4/distributions/multivariate.py b/pymc4/distributions/multivariate.py
@@ -183,9 +183,11 @@ class MvNormal(ContinuousDistribution):
     Define a multivariate normal variable for a given covariance
     matrix.
 
+    >>> import numpy as np
+    >>> import pymc4 as pm
     >>> covariance_matrix = np.array([[1., 0.5], [0.5, 2]])
     >>> mu = np.zeros(2)
-    >>> vals = pm.MvNormal('vals', loc=loc, covariance_matrix=covariance_matrix, shape=(5, 2))
+    >>> vals = pm.MvNormal('vals', loc=mu, covariance_matrix=covariance_matrix, shape=(5, 2))
     """
 
     def __init__(self, name, loc, covariance_matrix, **kwargs):
@@ -360,10 +362,12 @@ class MvNormalCholesky(ContinuousDistribution):
     Define a multivariate normal variable for a given cholesky
     factor of the full covariance matrix (scale_tril).
 
+    >>> import numpy as np
+    >>> import pymc4 as pm
     >>> covariance_matrix = np.array([[1., 0.5], [0.5, 2]])
     >>> chol_factor = np.linalg.cholesky(covariance_matrix)
     >>> mu = np.zeros(2)
-    >>> vals = pm.MvNormalCholesky('vals', loc=loc, scale_tril=chol_factor)
+    >>> vals = pm.MvNormalCholesky('vals', loc=mu, scale_tril=chol_factor)
     """
 
     def __init__(self, name, loc, scale_tril, **kwargs):

diff --git a/pymc4/distributions/transforms.py b/pymc4/distributions/transforms.py
@@ -3,6 +3,7 @@
 
 from tensorflow_probability import bijectors as tfb
 
+
 __all__ = ["Log", "Sigmoid", "LowerBound", "UpperBound", "Interval"]
 
 
@@ -12,9 +13,49 @@ class JacobianPreference(enum.Enum):
 
 
 class Transform:
+    """
+    Baseclass to define a bijective transformation of a distribution.
+
+    Parameters
+    ----------
+    transform : tfp.bijectors.bijector
+        The bijector that is called for the transformation.
+    event_ndims : int
+        The number of event dimensions of the distribution that has to be transformed.
+        This is normally automatically set during the initialization of the
+        PyMC4 distribution
+    """
+
     name: Optional[str] = None
     jacobian_preference = JacobianPreference.Forward
 
+    def __init__(self, transform=None, event_ndims=None):
+        self._transform = transform
+        self.event_ndims = event_ndims
+
+    @property
+    def _untransformed_event_ndims(self):
+        """
+        The length of the event_shape of the untransformed distribution. If set to None,
+        it can lead to errors in case of non autobatched sampling.
+        """
+        if self.event_ndims is None:
+            return self._min_event_ndims
+        else:
+            return self.event_ndims
+
+    @property
+    def _transformed_event_ndims(self):
+        """ The length of the event_shape of the transformed distribution"""
+        if self.event_ndims is None:
+            return self._transform.inverse_event_ndims(self._min_event_ndims)
+        else:
+            return self._transform.inverse_event_ndims(self.event_ndims)
+
+    @property
+    def _min_event_ndims(self):
+        return NotImplementedError
+
     def forward(self, x):
         """
         Forward of a bijector.
@@ -93,12 +134,12 @@ def inverse_log_det_jacobian(self, z):
 
 
 class Invert(Transform):
-    def __init__(self, transform):
+    def __init__(self, transform, **kwargs):
         if transform.jacobian_preference == JacobianPreference.Forward:
             self.jacobian_preference = JacobianPreference.Backward
         else:
             self.jacobian_preference = JacobianPreference.Forward
-        self._transform = transform
+        super().__init__(transform, **kwargs)
 
     def forward(self, x):
         return self._transform.inverse(x)
@@ -107,19 +148,27 @@ def inverse(self, z):
         return self._transform.forward(z)
 
     def forward_log_det_jacobian(self, x):
-        return self._transform.inverse_log_det_jacobian(x)
+        return self._transform.inverse_log_det_jacobian(x, self._untransformed_event_ndims)
 
     def inverse_log_det_jacobian(self, z):
-        return self._transform.forward_log_det_jacobian(z)
+        return self._transform.forward_log_det_jacobian(z, self._transformed_event_ndims)
 
 
 class BackwardTransform(Transform):
-    """Base class for Transforms with Jacobian Preference as Backward"""
+    """
+    Base class for Transforms with Jacobian Preference as Backward.
+    Backward means that the transformed values are in the domain of the specified function
+    and the untransformed values in the codomain.
+    """
 
     JacobianPreference = JacobianPreference.Backward
 
-    def __init__(self, transform):
-        self._transform = transform
+    def __init__(self, transform, **kwargs):
+        super().__init__(transform, **kwargs)
+
+    @property
+    def _min_event_ndims(self):
+        return self._transform._inverse_min_event_ndims
 
     def forward(self, x):
         return self._transform.inverse(x)
@@ -128,54 +177,54 @@ def inverse(self, z):
         return self._transform.forward(z)
 
     def forward_log_det_jacobian(self, x):
-        return self._transform.inverse_log_det_jacobian(x, self._transform.inverse_min_event_ndims)
+        return self._transform.inverse_log_det_jacobian(x, self._untransformed_event_ndims)
 
     def inverse_log_det_jacobian(self, z):
-        return self._transform.forward_log_det_jacobian(z, self._transform.forward_min_event_ndims)
+        return self._transform.forward_log_det_jacobian(z, self._transformed_event_ndims)
 
 
 class Log(BackwardTransform):
     name = "log"
 
-    def __init__(self):
+    def __init__(self, **kwargs):
         # NOTE: We actually need the inverse to match PyMC3, do we?
         transform = tfb.Exp()
-        super().__init__(transform)
+        super().__init__(transform, **kwargs)
 
 
 class Sigmoid(BackwardTransform):
     name = "sigmoid"
 
-    def __init__(self):
+    def __init__(self, **kwargs):
         transform = tfb.Sigmoid()
-        super().__init__(transform)
+        super().__init__(transform, **kwargs)
 
 
 class LowerBound(BackwardTransform):
     """"Transformation to interval [lower_limit, inf]"""
 
     name = "lowerbound"
 
-    def __init__(self, lower_limit):
+    def __init__(self, lower_limit, **kwargs):
         transform = tfb.Chain([tfb.Shift(lower_limit), tfb.Exp()])
-        super().__init__(transform)
+        super().__init__(transform, **kwargs)
 
 
 class UpperBound(BackwardTransform):
     """"Transformation to interval [-inf, upper_limit]"""
 
     name = "upperbound"
 
-    def __init__(self, upper_limit):
+    def __init__(self, upper_limit, **kwargs):
         transform = tfb.Chain([tfb.Shift(upper_limit), tfb.Scale(-1), tfb.Exp()])
-        super().__init__(transform)
+        super().__init__(transform, **kwargs)
 
 
 class Interval(BackwardTransform):
     """"Transformation to interval [lower_limit, upper_limit]"""
 
     name = "interval"
 
-    def __init__(self, lower_limit, upper_limit):
+    def __init__(self, lower_limit, upper_limit, **kwargs):
         transform = tfb.Sigmoid(low=lower_limit, high=upper_limit)
-        super().__init__(transform)
+        super().__init__(transform, **kwargs)
diff --git a/pymc4/inference/sampling.py b/pymc4/inference/sampling.py
@@ -113,26 +113,27 @@ def sample(
     Let's start with a simple model. We'll need some imports to experiment with it.
     >>> import pymc4 as pm
     >>> import numpy as np
-    This particular model has a latent variable `sd`
+    >>> # This particular model has a latent variable `sd`
     >>> @pm.model
     ... def nested_model(cond):
     ...     sd = yield pm.HalfNormal("sd", 1.)
     ...     norm = yield pm.Normal("n", cond, sd, observed=np.random.randn(10))
     ...     return norm
-    Now, we may want to perform sampling from this model. We already observed some variables and we
+    >>> # Now, we may want to perform sampling from this model. We already observed some variables and we
     now need to fix the condition.
     >>> conditioned = nested_model(cond=2.)
-    Passing ``cond=2.`` we condition our model for future evaluation. Now we go to sampling.
+    >>> # Passing ``cond=2.`` we condition our model for future evaluation. Now we go to sampling.
     Nothing special is required but passing the model to ``pm.sample``, the rest configuration is
     held by PyMC4.
     >>> trace = sample(conditioned)
+
     Notes
     -----
     Things that are considered to be under discussion are overriding observed variables. The API
     for that may look like
     >>> new_observed = {"nested_model/n": np.random.randn(10) + 1}
     >>> trace = sample(conditioned, observed=new_observed)
-    This will give a trace with new observed variables. This way is considered to be explicit.
+    >>> # This will give a trace with new observed variables. This way is considered to be explicit.
     """
     # assign sampler is no sampler_type is passed``
     sampler_assigned: str = auto_assign_sampler(model, sampler_type)

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
 arviz>=0.5.1
+xarray<=0.6.0 # because the version 0.6.1 throws an error when imported by arviz
 gast>=0.3.2
 tf-nightly
 tfp-nightly