In [1]:
import tensorflow as tf
import tangent as tgt
import tensorflow_probability as tfp
tfb = tfp.bijectors
tfd = tfp.distributions

tf.enable_eager_execution()

  from ._conv import register_converters as _register_converters


In [2]:
import matplotlib.pyplot as plt

In [3]:
# TODO should define class Dense(tfb.Bijector): ...
def Dense(num_units, name):
    with tf.variable_scope('affine_transform' + name) as scope:
        shift = tf.get_variable(name='shift', shape=[num_units], dtype=tf.float32)
        mat = tf.get_variable(name='scale', shape=[num_units, num_units], dtype=tf.float32)
        scale = tf.linalg.LinearOperatorFullMatrix(mat)
        affine = tfb.AffineLinearOperator(shift, scale=scale)
        return affine

In [4]:
num_units = 16
fn = tfb.Chain([
    Dense(num_units, '0'),
    Dense(num_units, '1'),
])
dist = tfd.MultivariateNormalDiag(loc=tf.zeros([num_units]))

density = tfd.TransformedDistribution(
    distribution=dist,
    bijector=fn)

In [5]:
y = tf.random_normal([10, num_units])
density.prob(y)



  _get_logger().warn(msg, *args, **kwargs)
  _get_logger().warn(msg, *args, **kwargs)
  _get_logger().warn(msg, *args, **kwargs)
  _get_logger().warn(msg, *args, **kwargs)


<tf.Tensor: id=448, shape=(10,), dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>

In [6]:
def Conv(num_units, name):
    # want a way to impose structural priors on the fn
    # to add local/hierarchical prior
    # i guess this is what autoregressive flows attempt to do???
    
    # how can I construct invertible fns that change the dimensionality!?
    with tf.variable_scope('conv_transform' + name) as scope:
        shift = tf.get_variable(name='shift', shape=[num_units*num_units])
        mat = tf.get_variable(name='scale', shape=[num_units, num_units], dtype=tf.float32)
        spectrum = tf.fft2d(tf.cast(mat, tf.complex64))
        scale = tf.linalg.LinearOperatorCirculant2D(spectrum, input_output_dtype=tf.float32)
        affine = tfb.AffineLinearOperator(shift, scale=scale)
        return affine

In [7]:
class Conv(tfb.Bijector):
    """
    Want a hierarchical version of autoregressive flow.
    Each layer is generated conditional on the previous one.
    P(x) = P(x|z_n)P(z_n|z_n-1) ... P(z_1|z_0)
    
    It must be possible to construct a conv bijector as no info is destroyed!?
    Hmm, maybe not, same with linear operators, but det = 0. need to investigate.
    
    # TODO want to weaken to approximate inversion!?
    # will want guarantees on error
    > " Geometrically, the Jacobian determinant is the volume of the transformation and is used to scale the probability"
    """
    def __init__(self, n_inputs, n_outputs, stride, validate_args=False, name='conv'):
        """
        Args:
            n_inputs (int): the number of features (last dim)
            n_outputs (int): the target num of feautres
            stride (int): the size of steps to take when convolving
        """
        
        """
        If the bijector changes the shape of the input, you must also implement:

          - _forward_event_shape_tensor,
          - _forward_event_shape (optional),
          - _inverse_event_shape_tensor,
          - _inverse_event_shape (optional).
        """
        super(self.__class__, self).__init__(
            validate_args=validate_args,
            forward_min_event_ndims=0,
            name=name)
        
        self.n_outputs = n_outputs
        self.stride = stride
        self.output_shape = None
        
        with tf.variable_scope(name):
            self.weights = tf.get_variable(name='weights', 
                                           shape=[4,4,x.shape[-1],n_outputs], 
                                           dtype=tf.float32)
            self.bias = tf.get_variable(name='bias', 
                                        shape=[n_outputs], 
                                        dtype=tf.float32)
            
    def dfdx(self, x, w, b):
        # copied from tanget
        l = [1, self.stride, self.stride, 1]
        b_return = tf.ones(self.output_shape)
        
        tf_nn_conv2d = tf.nn.conv2d(x, w, l, padding='SAME')
        _return = tf_nn_conv2d + b
        assert tgt.shapes_match(_return, b_return
            ), 'Shape mismatch between return value (%s) and seed derivative (%s)' % (
            numpy.shape(_return), numpy.shape(b_return))

        # Grad of: _return = tf_nn_conv2d + b
        _btf_nn_conv2d = tgt.unbroadcast(b_return, tf_nn_conv2d)
        btf_nn_conv2d = _btf_nn_conv2d

        # Grad of: tf_nn_conv2d = tf.nn.conv2d(x, w, l, padding='SAME')
        _bx = tf.nn.conv2d_backprop_input(tf.shape(x), w, btf_nn_conv2d, l, 'SAME')
        bx = _bx
        return bx
                
    def _forward(self, x):
        self.output_shape = tf.shape(x)
        return tf.nn.conv2d(x, self.weights, [1,self.stride,self.stride,1], padding='SAME')
    
    def _inverse(self, y):
        # TODO how can I verify this!? x = f_1(f(x))
        if self.output_shape is None:
            shape = tf.shape(y)
            self.output_shape = [shape[0], shape[1]//self.stride, 
                                  shape[1]//self.stride, self.n_outputs]
        return tf.nn.conv2d_transpose(y, self.weights, strides=[1,self.stride,self.stride,1],
                                      output_shape=self.output_shape)
    
    def _forward_log_det_jacobian(self, x):
        # what shape is this supposed to return!?!!?
        
        # the det of any low rank linear operator is zero
        # but ?!?!? bc we take the det of the weights
        J = self.dfdx(x, self.weights, self.bias)
        # jacobian of a linear fn should be constant accross all batches.
        return tf.log(non_square_det(J))
    
    def _inverse_log_det_jacobian(self, y):
        return -self._forward_log_det_jacobian(self.inverse(y))

In [21]:
def non_square_det(x):
    # https://www.quora.com/How-do-we-calculate-the-determinant-of-a-non-square-matrix
    
    # tests. equal for square mats?
    # x = tf.random_normal([3,3])
    # non_square_det(x), tf.linalg.det(x)
    
    if len(x.shape) > 2:
        x_ = tf.reshape(x, [x.shape[0], x.shape[1]*x.shape[2], x.shape[3]])
        squared_mat = tf.matmul(x_, x_, transpose_b=True)
        return tf.sqrt(tf.linalg.det(squared_mat))
    else:
        squared_mat = tf.matmul(x, x, transpose_b=True)
        det = tf.sqrt(tf.linalg.det(squared_mat))
        return tf.linalg.det(x)

In [22]:
x = tf.random_normal([10,10])
non_square_det(x)

<tf.Tensor: id=519, shape=(), dtype=float32, numpy=128.04897>

In [27]:
x = tf.random_normal([10,28,28,4])
det = non_square_det(x)

In [105]:
x = tf.random_normal([2, 28,28,1])
f = Conv(1, 1, 1)

In [106]:
y = f.forward(x)
x_ = f.inverse(y)  # uses caching!?
tf.reduce_sum(x-x_)

<tf.Tensor: id=1793, shape=(), dtype=float32, numpy=0.0>

In [107]:
f_ = tfb.Invert(f)  # does this construct a new instance?? which would mean new params are init
x_ = f_.forward(y)
tf.reduce_sum(x-x_)

<tf.Tensor: id=1799, shape=(), dtype=float32, numpy=-1.368999>

In [108]:
f_.forward_log_det_jacobian(x, event_ndims=0)

TypeError: reshape() missing 1 required positional argument: 'shape'

In [115]:
x = tf.random_normal([2,8])
g = tfb.AffineLinearOperator(shift=tf.random_normal([8]),
                             scale=tf.linalg.LinearOperatorFullMatrix(tf.random_normal([8,8])))
y = g.forward_log_det_jacobian(x, event_ndims=1)
y.shape, y  #?!?!?



  _get_logger().warn(msg, *args, **kwargs)


(TensorShape([]),
 <tf.Tensor: id=2050, shape=(), dtype=float32, numpy=2.8675199>)

In [57]:
f_.inverse_log_det_jacobian(y, event_ndims=0)

<tf.Tensor: id=1130, shape=(), dtype=float32, numpy=-inf>

In [166]:
fn = tfb.Chain([
    Conv(n_inputs=1, n_outputs=1, stride=1, name='0'),
#     tfb.Invert(Conv(n_inputs=1, n_outputs=1, stride=2, name='1')),
])
dist = tfd.MultivariateNormalDiag(loc=tf.zeros([1, 28, 28, 1]), 
                                  scale_diag=tf.ones([1, 28, 28, 1]))

density = tfd.TransformedDistribution(
    distribution=dist,
    bijector=fn)

In [167]:
y = density.sample()
y.shape

TensorShape([Dimension(1), Dimension(28), Dimension(28), Dimension(1)])

In [168]:
p = density.prob(y)

InvalidArgumentError: Invalid reduction dimension (-1 for input with 0 dimension(s) [Op:Sum]

In [17]:
tf.linalg.det(tf.constant([
    [100.0, 0.0],
    [1e-10, 1.0]
]))

<tf.Tensor: id=895, shape=(), dtype=float32, numpy=100.00001>

In [18]:
"""
 |  A `TransformedDistribution` using `MaskedAutoregressiveFlow(...)` uses the
 |  (expensive) forward-mode calculation to draw samples and the (cheap)
 |  reverse-mode calculation to compute log-probabilities. Conversely, a
 |  `TransformedDistribution` using `Invert(MaskedAutoregressiveFlow(...))` uses
 |  the (expensive) forward-mode calculation to compute log-probabilities and the
 |  (cheap) reverse-mode calculation to compute samples.  See "Example Use"
 |  [below] for more details.
"""

'\n |  A `TransformedDistribution` using `MaskedAutoregressiveFlow(...)` uses the\n |  (expensive) forward-mode calculation to draw samples and the (cheap)\n |  reverse-mode calculation to compute log-probabilities. Conversely, a\n |  `TransformedDistribution` using `Invert(MaskedAutoregressiveFlow(...))` uses\n |  the (expensive) forward-mode calculation to compute log-probabilities and the\n |  (cheap) reverse-mode calculation to compute samples.  See "Example Use"\n |  [below] for more details.\n'

In [21]:

dims = 5
# A common choice for a normalizing flow is to use a Gaussian for the base
# distribution. (However, any continuous distribution would work.) E.g.,
maf = tfd.TransformedDistribution(
distribution=tfd.Normal(loc=0., scale=1.),
bijector=tfb.MaskedAutoregressiveFlow(
    shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
    hidden_layers=[512, 512])),
    event_shape=[1, dims])

x = maf.sample()  # Expensive; uses `tf.while_loop`, no Bijector caching.
maf.log_prob(x)   # Almost free; uses Bijector caching.
maf.log_prob(0.)  # Cheap; no `tf.while_loop` despite no Bijector caching.

NotImplementedError: Caching devices are not yet supported when eager execution is enabled.