In [1]:
import torch
from torch.distributions import *
from flows import *

# dataset is skewed, correlated multivariate gaussian
dim = 2
datapoints = 2500

mu = torch.tensor([4.5, -4.5])
sigma = torch.tensor([[3.0, 2], [2, 3.0]])
dist = MultivariateNormal(mu, sigma)

table = dist.sample_n(datapoints)
dataset = torch.utils.data.TensorDataset(table)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1000)
print(table)

tensor([[ 2.7963, -6.3180],
        [ 3.5935, -5.4084],
        [ 4.3847, -5.0729],
        ...,
        [ 5.4512, -2.9535],
        [ 5.7022, -2.0564],
        [ 0.9494, -8.1974]])


In [72]:
# dataset is mixture of multivariate gaussians
dim = 2
m = 4
datapoints = 2500

from torch.distributions import MixtureSameFamily

mix = Categorical(torch.ones(m,))
comp = Independent(Normal(
             torch.randn(m,2), torch.rand(m,2)), 1)
dist = MixtureSameFamily(mix, comp)

table = dist.sample_n(datapoints)
dataset = torch.utils.data.TensorDataset(table)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1000)

In [105]:
# flows convert to normal dist
normal_flows = Flows(
    # DenseTriangularFlow(dim, True),
    # DenseTriangularFlow(dim, False),
    # DenseTriangularFlow(dim, True),
    # DenseTriangularFlow(dim, False),
    # SoftsquareFlow(),

    DenseTriangularFlow(dim, True),
    # RankOneConvolutionFlow(dim),
    # DenseTriangularFlow(dim, False),
    # DenseTriangularFlow(dim, True),
    # DenseTriangularFlow(dim, False),
    # SoftlogFlow(),
    # RankOneConvolutionFlow(dim),
    # RankOneConvolutionFlow(dim),
    # RankOneConvolutionFlow(dim),
    # RankOneConvolutionFlow(dim),
    # SoftsquareFlow(2),
    # SoftlogFlow(),
    SoftsquareFlow(2),
    SoftsquareFlow(2),
    SoftsquareFlow(2),
    SoftsquareFlow(2),
    # DenseTriangularFlow(dim, True),
    # RankOneConvolutionFlow(dim),
    # DenseTriangularFlow(dim, False),
    # DenseTriangularFlow(dim, True),
    # DenseTriangularFlow(dim, False),
    # SoftlogFlow(),
    # SoftlogFlow(),
    # DenseTriangularFlow(dim, True),
    # DenseTriangularFlow(dim, False),
    # RankOneConvolutionFlow(dim),

    # DenseTriangularFlow(dim, True),
    # DenseTriangularFlow(dim, False),
    # RankOneConvolutionFlow(dim),
    # SoftlogFlow(),
    # SoftsquareFlow(2),

    # DenseTriangularFlow(dim, True),
    # RankOneConvolutionFlow(dim),
    # DenseTriangularFlow(dim, False),

    # DenseTriangularFlow(dim, True),
    # RankOneConvolutionFlow(dim),
    # DenseTriangularFlow(dim, False),
    # SoftsquareFlow(2),
    # SoftlogFlow()
)

# final flow converts to normal

# train on NLL
net = FlowModule(normal_flows, NegLogLikelihoodLoss(dim))

optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)

In [106]:
final_loss = 0.0
for batch in range(200):  #1oop over the dataset multiple times
    for _, data in enumerate(dataloader, 0):
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        loss = net(data[0])
        loss.backward()
        optimizer.step()
        final_loss = loss
print("final loss: %.03f" % final_loss)

final loss: 2.606


In [6]:
# - See kl-estimator.ipynb for details & benchmark
def kl_estimate_log(log_px, log_qx, n):
    return (log_px - log_qx).mean()

In [107]:
n = 50000
points = dist.sample_n(n)
p_points = dist.log_prob(points)

normalized_points, log_det = normal_flows(points)
dist_target = MultivariateNormal(torch.zeros(dim), torch.eye(dim))
p_normalized_points = dist_target.log_prob(normalized_points) + log_det

uniform_with_y = torch.stack([p_points.exp(), p_normalized_points.exp()], dim=1)
print(uniform_with_y)

print()
# - RMSE measure between P and the reconstructed P.
# This performs 
# rmse = (p_points - p_normalized_points).pow(2).mean().sqrt()
# print("rmse:", rmse)

print("~kl div:", kl_estimate_log(p_points, p_normalized_points, n))

tensor([[0.3761, 0.1044],
        [0.0163, 0.0507],
        [0.0592, 0.0685],
        ...,
        [0.2189, 0.1050],
        [3.1404, 0.0699],
        [0.0604, 0.1093]], grad_fn=<StackBackward>)

~kl div: tensor(2.5758, grad_fn=<MeanBackward0>)


In [78]:
n = 5000
points = dist.sample_n(n)
p_points = dist.log_prob(points)

for ps, log_det, f in zip(*normal_flows.forward_trace(points),normal_flows.flows):
    dist_target = MultivariateNormal(torch.zeros(dim), torch.eye(dim))
    p_normalized_points = dist_target.log_prob(ps) + log_det
    approx_kl_divergence = kl_estimate_log(p_points, p_normalized_points, n).item()
    params = list(f.parameters())
    print("%1.4f - %s:" % (approx_kl_divergence, f))

3.1109 - DenseTriangularFlow():
3.0845 - DenseTriangularFlow():
3.0634 - DenseTriangularFlow():
3.0464 - DenseTriangularFlow():
3.0328 - DenseTriangularFlow():
3.0218 - DenseTriangularFlow():
3.0127 - DenseTriangularFlow():
3.0050 - DenseTriangularFlow():
2.9980 - DenseTriangularFlow():
2.9914 - DenseTriangularFlow():
2.9847 - DenseTriangularFlow():
2.9775 - DenseTriangularFlow():
2.9695 - DenseTriangularFlow():
2.9606 - DenseTriangularFlow():
2.9505 - DenseTriangularFlow():
2.9391 - DenseTriangularFlow():
2.9263 - DenseTriangularFlow():
2.9122 - DenseTriangularFlow():
2.8966 - DenseTriangularFlow():
2.8797 - DenseTriangularFlow():
2.8617 - DenseTriangularFlow():
2.8425 - DenseTriangularFlow():
2.8225 - DenseTriangularFlow():
2.8019 - DenseTriangularFlow():
2.7809 - DenseTriangularFlow():
2.7597 - DenseTriangularFlow():
2.7387 - DenseTriangularFlow():
2.7183 - DenseTriangularFlow():
2.6986 - DenseTriangularFlow():
2.6802 - DenseTriangularFlow():
2.6632 - DenseTriangularFlow():
2.6481 -

In [20]:
# GMM, just triangular
# 0.7054 - DenseTriangularFlow():
# 0.6377 - DenseTriangularFlow():
# 0.5690 - DenseTriangularFlow():
# 0.5137 - DenseTriangularFlow():
# 0.4637 - DenseTriangularFlow():
# 0.4279 - DenseTriangularFlow():
# 0.4056 - DenseTriangularFlow():
# 0.4009 - DenseTriangularFlow():

# GMM, with softsquare:
# 0.9543 - DenseTriangularFlow():
# 1.3299 - DenseTriangularFlow():
# 1.9128 - DenseTriangularFlow():
# 2.7210 - DenseTriangularFlow():
# 3.7737 - SoftsquareFlow():
# 2.1087 - DenseTriangularFlow():
# 1.0214 - DenseTriangularFlow():
# 0.6332 - DenseTriangularFlow():
# 0.3926 - DenseTriangularFlow():
# 0.2179 - SoftsquareFlow():

In [152]:
# weight decay=0
# 12.0914 - DenseTriangularFlow():
# 6.5785 - DenseTriangularFlow():
# 7.2849 - RankOneConvolutionFlow():
# 3.9024 - DenseTriangularFlow():
# 2.0706 - DenseTriangularFlow():
# 2.6122 - RankOneConvolutionFlow():
# 1.3863 - DenseTriangularFlow():
# 0.8577 - DenseTriangularFlow():
# 0.8722 - RankOneConvolutionFlow():
# 0.4547 - DenseTriangularFlow():
# 0.4224 - DenseTriangularFlow():
# 0.0007 - RankOneConvolutionFlow():
# 0.0010 - final

# weight decay=0.3
# 11.0923 - DenseTriangularFlow():
# 5.7981 - DenseTriangularFlow():
# 5.7981 - RankOneConvolutionFlow():
# 3.1169 - DenseTriangularFlow():
# 1.7651 - DenseTriangularFlow():
# 1.7651 - RankOneConvolutionFlow():
# 1.0828 - DenseTriangularFlow():
# 0.7314 - DenseTriangularFlow():
# 0.6163 - RankOneConvolutionFlow():
# 0.4187 - DenseTriangularFlow():
# 0.2986 - DenseTriangularFlow():
# 0.0802 - RankOneConvolutionFlow():
# 0.0879 - final

In [59]:
# dense triangular x4, softlog, dense triangular x4, softlog:
# loss: 2.951, rmse 0.0150

In [21]:
for f in normal_flows.flows:
    print(f)
    for name, param in f.named_parameters():
        print("  %s = %s" % (name, param.data))
    print()



DenseTriangularFlow()
  w = tensor([[ 0.9963, -0.0506],
        [ 0.0000,  1.0312]])
  b = tensor([0.0112, 0.4087])

RankOneConvolutionFlow()
  u = tensor([[-0.0097],
        [ 0.5840]])
  vT = tensor([[0.1250, 0.3196]])

DenseTriangularFlow()
  w = tensor([[0.9746, 0.0000],
        [0.1247, 1.0445]])
  b = tensor([-0.0548,  0.4099])

DenseTriangularFlow()
  w = tensor([[ 0.9707, -0.0587],
        [ 0.0000,  1.0640]])
  b = tensor([-0.0547,  0.4202])

RankOneConvolutionFlow()
  u = tensor([[-0.0569],
        [ 0.7591]])
  vT = tensor([[0.0269, 0.4760]])

DenseTriangularFlow()
  w = tensor([[ 0.9605,  0.0000],
        [-0.0578,  1.0791]])
  b = tensor([-0.0906,  0.4134])

SoftsquareFlow()
  a = tensor([0.8160, 0.3563])
  b = tensor([-0.3085,  0.4978])

DenseTriangularFlow()
  w = tensor([[ 0.8999, -0.1381],
        [ 0.0000,  0.8258]])
  b = tensor([ 0.1859, -0.2439])

RankOneConvolutionFlow()
  u = tensor([[ 0.0754],
        [-0.8674]])
  vT = tensor([[-0.7212,  0.7180]])

DenseTriangu

In [13]:
print(net.flow.flows[2].u)
print(net.flow.flows[2].u.grad)
print(net.flow.flows[2].vT)
print(net.flow.flows[2].vT.grad)

Parameter containing:
tensor([[0.1384],
        [0.0312]], requires_grad=True)
tensor([[-0.0010],
        [ 0.0205]])
Parameter containing:
tensor([[-0.0314, -0.1211]], requires_grad=True)
tensor([[ 0.0111, -0.0070]])


In [8]:
print(list(net.parameters()))

[Parameter containing:
tensor([[-0.0099],
        [-0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099, -0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099],
        [-0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099, -0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099],
        [-0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099, -0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099],
        [-0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099, -0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099],
        [-0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099, -0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099],
        [-0.0099]], requires_grad=True), Parameter containing:
tensor([[-0.0099, -0.0099]], requires_grad=True)]
