In [None]:
# This notebook generates, trains, and benchmarks normalizing flows on univariate, analytic distribution.
# The source distribution can be Laplace, Exponential, Cauchy, ContinuousBernoulliContinuousBernoulli, HalfCauchy, HalfNormal, Normal, or Uniform.
# See flows.py for a selection of modules.

In [9]:
import torch
from torch.distributions import *
from flows import *

# see kl-estimator.ipynb for an explanation
def kl_estimate_log(log_px, log_qx, n):
    return (log_px - log_qx).mean()


dim = 1
datapoints = 2500

# Any of these distributions can be used to test a flow
# Uncomment one and run the rest of the notebook.
# dist = Laplace(torch.tensor([0.0]), torch.tensor([1.]))
dist = Exponential(torch.tensor([1.0]))
# dist = Cauchy(torch.tensor([0.0]), torch.tensor([1.0]))
# dist = ContinuousBernoulli(torch.tensor([0.3]))
# dist = HalfCauchy(torch.tensor([1.0]))
# dist = HalfNormal(torch.tensor([1.0]))
# dist = Normal(torch.tensor([0.0]), torch.tensor([1.0]))
# dist = TransformedDistribution { ... }
# dist = Uniform(torch.tensor([0.0]), torch.tensor([1.0]))

table = dist.sample_n(datapoints)
dataset = torch.utils.data.TensorDataset(table)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1000)

In [38]:
# These are the trainable layers.  The output is interpreted as a normally distributed variable.
normal_flows = Flows(
    DenseTriangularFlow(dim, True),
    DenseTriangularFlow(dim, False),
    DenseTriangularFlow(dim, True),
    DenseTriangularFlow(dim, False),
    SoftlogFlow(),
    DenseTriangularFlow(dim, True),
    DenseTriangularFlow(dim, False),
    DenseTriangularFlow(dim, True),
    DenseTriangularFlow(dim, False),
    SoftlogFlow(),
    DenseTriangularFlow(dim, True),
    DenseTriangularFlow(dim, False),
    DenseTriangularFlow(dim, True),
    DenseTriangularFlow(dim, False),
    SoftlogFlow()
)

# NegLogLikelihoodLoss implements the NLL on a multivariate gaussian with unit covariance
net = FlowModule(normal_flows, NegLogLikelihoodLoss(dim))

optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)

In [39]:
# Training loop!
final_loss = 0.0
for batch in range(200):  #1oop over the dataset multiple times
    for _, data in enumerate(dataloader, 0):
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        loss = net(data[0])
        loss.backward()
        optimizer.step()
        final_loss = loss
print("final loss: %.03f" % final_loss)

final loss: 1.153


In [40]:
# Prints the network parameters
for f in normal_flows.flows:
    print(f)
    for name, param in f.named_parameters():
        print("  %s = %s" % (name, param.data))
    print()

DenseTriangularFlow()
  w = tensor([[2.3088]])
  b = tensor([-0.0131])

DenseTriangularFlow()
  w = tensor([[2.2353]])
  b = tensor([-0.0088])

DenseTriangularFlow()
  w = tensor([[2.1605]])
  b = tensor([-0.0095])

DenseTriangularFlow()
  w = tensor([[2.0920]])
  b = tensor([-0.0129])

SoftlogFlow()

DenseTriangularFlow()
  w = tensor([[1.1157]])
  b = tensor([0.0621])

DenseTriangularFlow()
  w = tensor([[1.0999]])
  b = tensor([0.0458])

DenseTriangularFlow()
  w = tensor([[1.0835]])
  b = tensor([0.0316])

DenseTriangularFlow()
  w = tensor([[1.0674]])
  b = tensor([0.0193])

SoftlogFlow()

DenseTriangularFlow()
  w = tensor([[1.1526]])
  b = tensor([-1.0044])

DenseTriangularFlow()
  w = tensor([[1.5396]])
  b = tensor([-0.8566])

DenseTriangularFlow()
  w = tensor([[1.7632]])
  b = tensor([-0.6966])

DenseTriangularFlow()
  w = tensor([[1.8620]])
  b = tensor([-0.5534])

SoftlogFlow()



In [44]:
# Generates a random set of points from `dist`, and tests the network performance
# Shows side-by-side probability values (left is actual P(x))
# Also estimates the KL divergence between the P(x), and the network's reconstruction

n = 10000
points = dist.sample_n(n)
p_points = dist.log_prob(points)

normalized_points, log_det = normal_flows(points)
dist_target = MultivariateNormal(torch.zeros(dim), torch.eye(dim))
p_normalized_points = dist_target.log_prob(normalized_points) + log_det

uniform_with_y = torch.stack([p_points.exp().squeeze(), p_normalized_points.exp()], dim=1)
print(uniform_with_y)

print()

print("~kl div:", kl_estimate_log(p_points, p_normalized_points, n))

tensor([[0.0219, 0.0199],
        [0.3832, 0.4190],
        [0.5378, 0.7401],
        ...,
        [0.6793, 0.6549],
        [0.5978, 0.7021],
        [0.0302, 0.0235]], grad_fn=<StackBackward>)

~kl div: tensor(0.1683, grad_fn=<MeanBackward0>)
