In [29]:
# This notebook tests normalizing flow implementations.
# Each flow is checked for:
# - `backwards` implementation accuracy.
# - jacobian determinant accuracy.  this code can check that the jacobian is actually triangular (necessary for log_det to reduce to J.diag().log().sum()),
#     and that the flow's log_det is correct
# This is going to be the basis for the unit tests when flows.py becomes a real library.

In [28]:
import torch
import torch.nn as nn
import torch.autograd as grad

def jacobian(f, x):
    """Computes the Jacobian of f w.r.t x.

    This is according to the reverse mode autodiff rule,

    sum_i v^b_i dy^b_i / dx^b_j = sum_i x^b_j R_ji v^b_i,

    where:
    - b is the batch index from 0 to B - 1
    - i, j are the vector indices from 0 to N-1
    - v^b_i is a "test vector", which is set to 1 column-wise to obtain the correct
        column vectors out ot the above expression.

    :param f: function R^N -> R^N
    :param x: torch.tensor of shape [B, N]
    :return: Jacobian matrix (torch.tensor) of shape [B, N, N]
    """

    B, N = x.shape
    y, _ = f(x)
    # y = f(x)
    jacobian = list()
    for i in range(N):
        v = torch.zeros_like(y)
        v[:, i] = 1.
        dy_i_dx = torch.autograd.grad(y,
                       x,
                       grad_outputs=v,
                       retain_graph=True,
                       create_graph=True,
                       allow_unused=True)[0]  # shape [B, N]
        jacobian.append(dy_i_dx)

    jacobian = torch.stack(jacobian, dim=2).requires_grad_()

    return jacobian

In [30]:
import torch
import torch.nn as nn
import torch.autograd as grad

import math
from flows import *

In [7]:
# A flow module with a simple Triangular matrix multiply
net = DenseTriangularFlow(2, False)
net.w = nn.Parameter(torch.tensor([[1.5, 0.0], [2.5, 0.5]]))
net.b = nn.Parameter(torch.tensor([1.0, -1.0]))
x = torch.tensor([[1.0, 2.0]])
x.requires_grad = True

print(jacobian(net, x))
print(jacobian(net, x)[0].diag().log().sum())

y, log_det = net(x)
print(y, log_det)
x, log_det_x = net.backward(y)
print(x, log_det_x)

tensor([[[1.5000, 2.5000],
         [0.0000, 0.5000]]], grad_fn=<StackBackward>)
tensor(-0.2877, grad_fn=<SumBackward0>)
tensor([[2.5000, 2.5000]], grad_fn=<AddBackward0>) tensor(-0.2877, grad_fn=<SumBackward0>)
tensor([[1., 2.]], grad_fn=<SqueezeBackward1>) tensor(0.2877, grad_fn=<MulBackward0>)


In [8]:
# A flow module which applies the logistic sigmoid function
net = SigmoidFlow()
x = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
x.requires_grad = True

print(jacobian(net, x))
print(jacobian(net, x)[0].diag().log().sum())

y, log_det = net(x)
print(y, log_det)
x, log_det_x = net.backward(y)
print(x, log_det_x)

tensor([[[0.1966, 0.0000],
         [0.0000, 0.1050]],

        [[0.0452, 0.0000],
         [0.0000, 0.0177]]], grad_fn=<StackBackward>)
tensor(-3.8804, grad_fn=<SumBackward0>)
tensor([[0.7311, 0.8808],
        [0.9526, 0.9820]], grad_fn=<SigmoidBackward>) tensor([-3.8804, -7.1335], grad_fn=<SumBackward1>)
tensor([[1.0000, 2.0000],
        [3.0000, 4.0000]], grad_fn=<LogBackward>) tensor([3.8804, 7.1335], grad_fn=<NegBackward>)


In [9]:
net = InverseFlow(SigmoidFlow())
x = torch.tensor([[0.7311, 0.8808], [0.9526, 0.9820]])
x.requires_grad = True

print(jacobian(net, x))
print(jacobian(net, x)[0].diag().log().sum())

y, log_det = net(x)
print(y, log_det)
x, log_det_x = net.backward(y)
print(x, log_det_x)

tensor([[[ 5.0867,  0.0000],
         [ 0.0000,  9.5246]],

        [[22.1468,  0.0000],
         [ 0.0000, 56.5739]]], grad_fn=<StackBackward>)
tensor(3.8805, grad_fn=<SumBackward0>)
tensor([[1.0002, 2.0000],
        [3.0006, 3.9992]], grad_fn=<LogBackward>) tensor([3.8805, 7.1332], grad_fn=<NegBackward>)
tensor([[0.7311, 0.8808],
        [0.9526, 0.9820]], grad_fn=<SigmoidBackward>) tensor([-3.8805, -7.1332], grad_fn=<SumBackward1>)


In [10]:
# A flow module that applies the element-wise function:
# y = sign(x) log(abs(x) + 1)
net = SoftlogFlow()
x = torch.tensor([[-1.0, -0.01, 0.0, 0.01, 2.0]])
x.requires_grad = True

print(jacobian(net, x))
print(jacobian(net, x)[0].diag().log().sum())

y, log_det = net(x)
print(y, log_det)
x, log_det_x = net.backward(y)
print(x, log_det_x)

tensor([[[0.5000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.9901, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 1.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.9901, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.3333]]], grad_fn=<StackBackward>)
tensor(-1.8117, grad_fn=<SumBackward0>)
tensor([[-0.6931, -0.0100,  0.0000,  0.0100,  1.0986]],
       grad_fn=<SoftlogFlowFunctionBackward>) tensor([-1.8117], grad_fn=<SumBackward1>)
tensor([[-1.0000, -0.0100,  0.0000,  0.0100,  2.0000]], grad_fn=<MulBackward0>) tensor([1.8117], grad_fn=<NegBackward>)


In [8]:
# A flow module that applies the element-wise function:
# y = a * x + b * sign(x) * x^2
net = SoftsquareFlow(1)
x = torch.tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])
x.requires_grad = True

print(jacobian(net, x))
print(jacobian(net, x)[0].diag().log().sum())

y, log_det = net(x)
print(y, log_det)
x, log_det_x = net.backward(y)
print(x, log_det_x)

tensor([[[1., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 0., 1., 0., 0.],
         [0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1.]]], grad_fn=<StackBackward>)
tensor(0., grad_fn=<SumBackward0>)
tensor([[-2., -1.,  0.,  1.,  2.]], grad_fn=<SoftsquareFlowFunctionBackward>) tensor([0.], grad_fn=<SumBackward1>)
tensor([[-0., -0., 0., 0., 0.]], grad_fn=<DivBackward0>) tensor([-0.], grad_fn=<NegBackward>)


In [9]:
# A rank-one update from the identity matrix.
# This flow is interesting, because it is not triangular, but has a tractable jacobian determinant.
# In order to calculate the determinant, we use numpy.linalg
from flows import *
import numpy

net = RankOneConvolutionFlow(2, epsilon=0.0)
# net.vT = torch.nn.Parameter(torch.tensor([[4.0, 0.25]]))
# net.u = torch.nn.Parameter(torch.tensor([[0.5], [2.0]]))
x = torch.tensor([[-1.0, 0.0], [1.0, 2.0]])
x.requires_grad = True

print(jacobian(net, x))
print(math.log(numpy.linalg.det(jacobian(net, x)[0].detach().numpy())))

y, log_det = net(x)
print(y, log_det)
x, log_det_x = net.backward(y)
print(x, log_det_x)

tensor([[[1., 0.],
         [0., 1.]],

        [[1., 0.],
         [0., 1.]]], grad_fn=<StackBackward>)
0.0
tensor([[-1.,  0.],
        [ 1.,  2.]], grad_fn=<AddBackward0>) tensor(0., grad_fn=<LogBackward>)
tensor([[-1.,  0.],
        [ 1.,  2.]], grad_fn=<SubBackward0>) tensor([[0.]], grad_fn=<LogBackward>)


In [11]:
# Another test case for the rank-one convolution flow
import numpy

net = RankOneConvolutionFlow(2, epsilon=0.0)
net.vT = torch.nn.Parameter(torch.tensor([[-1.0497, -0.4827]]))
net.u = torch.nn.Parameter(torch.tensor([[-1.0532], [0.2746]]))
x = torch.tensor([[-1.0, 0.0], [1.0, 2.0]])
x.requires_grad = True

print(jacobian(net, x))
print(math.log(numpy.linalg.det(jacobian(net, x)[0].detach().numpy())))

y, log_det = net(x)
print(y, log_det)
x, log_det_x = net.backward(y)
print(x, log_det_x)

tensor([[[ 2.1055, -0.2882],
         [ 0.5084,  0.8675]],

        [[ 2.1055, -0.2882],
         [ 0.5084,  0.8675]]], grad_fn=<StackBackward>)
0.6795525332573441
tensor([[-2.1055,  0.2882],
        [ 3.1223,  1.4467]], grad_fn=<AddBackward0>) tensor(0.6796, grad_fn=<LogBackward>)
tensor([[-1.0000e+00, -2.9802e-08],
        [ 1.0000e+00,  2.0000e+00]], grad_fn=<SubBackward0>) tensor([[-0.6796]], grad_fn=<LogBackward>)


In [13]:
# A flow which applies the element-wise Normal CDF function
net = NormalCdfFlow()
x = torch.tensor([[-1.0, -0.01, 0.0, 0.01, 1.0]])
x.requires_grad = True

print(jacobian(net, x))
print(jacobian(net, x)[0].diag().log().sum())

y, log_det = net(x)
print(y, log_det)
x, log_det_x = net.backward(y)
print(x, log_det_x)

tensor([[[0.2420, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.3989, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.3989, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.3989, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.2420]]], grad_fn=<StackBackward>)
tensor(-5.5948, grad_fn=<SumBackward0>)
tensor([[0.1587, 0.4960, 0.5000, 0.5040, 0.8413]], grad_fn=<MulBackward0>) tensor([-5.5948], grad_fn=<SumBackward1>)
tensor([[-1.0000, -0.0100,  0.0000,  0.0100,  1.0000]], grad_fn=<AddBackward0>) tensor([5.5948], grad_fn=<MulBackward0>)


In [14]:
# A flow which calculates the Negative Log Likelihood of the multivariate Gaussian with an identity covariance matrix.
net = NegLogLikelihoodLoss(2)
x = torch.tensor([[0.0, 0.], [0.3, 1.4]])
net(x, torch.tensor(2.0))

tensor(0.3504)

In [33]:
# A module which composes the given flows, converting values from (0-1) to Negative Log Likelihoods.
net = FlowModule(
    InverseFlow(NormalCdfFlow()),
    NegLogLikelihoodLoss(2)
)
x = torch.tensor([[0.50, 0.5], [0.5, 0.50], [0.50, 0.50]])
net(x)

tensor(-0.)

In [47]:
# The Negative Log Likelihood loss function, tested against the Pytorch normal distribution
net = NegLogLikelihoodLoss(2)
dist = torch.distributions.MultivariateNormal(torch.zeros(2), torch.eye(2))

x = torch.tensor([[0.4, 0.5], [0.6, 0.7]])

print(net.forward(x, 0.0))
print(dist.log_prob(x).mean())

tensor(2.1529)
tensor(-2.1529)


In [49]:
# Test several flows composed, against the jacobian determinant calculated from autodiff and np.linalg.det
import numpy as np

tri = DenseTriangularFlow(3, True)
triu = DenseTriangularFlow(3, False)
sig = SigmoidFlow()
net = Flows(tri, triu, sig)

x = torch.tensor([[1.0, 2.0, 3.0]])
x.requires_grad = True

print(net(x))
print(jacobian(net, x))
print(math.log(np.linalg.det(jacobian(net, x)[0].detach().numpy())))

(tensor([[0.7311, 0.8808, 0.9526]], grad_fn=<SigmoidBackward>), tensor([-6.9776], grad_fn=<AddBackward0>))
tensor([[[0.1966, 0.0000, 0.0000],
         [0.0000, 0.1050, 0.0000],
         [0.0000, 0.0000, 0.0452]]], grad_fn=<StackBackward>)
-6.97755383437876
