In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

from made import MADE
from data.bsds300 import BSDS300

from train import train_forward
from scores import log_likelihood
from scores import difference_loglik

from utils import update_device

In [3]:
from flows import create_iaf
from flows import create_maf
from flows import create_paf
from flows import create_realnvp
from flows import create_flows

In [4]:
from structure.ar import AR
from structure.iar import IAR
from structure.twoblock import TwoBlock

from transforms.affine import Affine
from transforms.piecewise import PiecewiseAffine

In [5]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device_cpu = torch.device("cpu")

In [None]:
dataset = ToyDataset()

In [None]:
dim_input = 2
dim_hidden = [10,10,10]
num_trans = 8
perm_type = 'alternate'

In [None]:
transformations = [Affine, Affine, PiecewiseAffine, PiecewiseAffine, Affine, Affine, PiecewiseAffine, PiecewiseAffine]
structures = IAR

In [None]:
flow = create_iaf(dim_input, dim_hidden, num_trans, perm_type)
#flow = create_maf(dim_input, dim_hidden, num_trans, perm_type)
#flow = create_paf(dim_input, dim_hidden, num_trans, perm_type)
#flow = create_realnvp(dim_input, dim_hidden, num_trans, perm_type)
#flow = create_flows(dim_input, dim_hidden, num_trans, perm_type, structure=structures, transformation=transformations)

In [None]:
update_device(device, flow, dataset)

#optimizer = torch.optim.AdamW(flow.parameters(), lr=1e-3)
optimizer = torch.optim.SGD(flow.parameters(), lr=1e-3)

loss = train_forward(flow, flow.get_base_distr(), dataset.get_training_data(), optimizer, epochs=200, batch_size=16,print_n=10)

update_device(device_cpu, flow, dataset)

In [None]:
plt.subplot(1,2,1)
plt.plot(loss)

plt.subplot(1,2,2)
plt.plot(loss[20:])

In [None]:
train_data = dataset.get_training_data()

In [None]:
with torch.no_grad():
    sample, log_prob = flow.sample(800)
    sample_last = sample[-1].detach().numpy()

In [None]:
xlims = (min(sample_last[:,0].min(), train_data[:,0].min())), max(sample_last[:,0].max(), train_data[:,0].max())
ylims = (min(sample_last[:,1].min(), train_data[:,1].min())), max(sample_last[:,1].max(), train_data[:,1].max())

plt.subplot(1,2,1)
plt.scatter(sample_last[:,0], sample_last[:,1])
plt.xlim(xlims)
plt.ylim(ylims)

plt.subplot(1,2,2)
plt.scatter(train_data[:,0].to('cpu'), train_data[:,1].to('cpu'))
plt.xlim(xlims)
plt.ylim(ylims)

In [None]:
log_prob_target = dataset.evaluate(sample[-1])
torch.mean(torch.abs(log_prob_target - log_prob))

In [None]:
with torch.no_grad():
    z_train, log_prob_train = flow.evaluate(train_data)

In [None]:
log_prob_train_target = dataset.evaluate(train_data)

In [None]:
torch.mean(torch.abs(log_prob_train_target-log_prob_train))

In [None]:
torch.mean(log_prob_train), torch.mean(log_prob_train_target)

In [None]:
test_data = dataset.get_test_data()

In [None]:
with torch.no_grad():
    z_train, log_prob_test = flow.evaluate(test_data)

In [None]:
log_prob_test_target = dataset.evaluate(test_data)

In [None]:
torch.mean(torch.abs(log_prob_test_target-log_prob_test))

In [None]:
torch.mean(log_prob_test), torch.mean(log_prob_test_target)

In [None]:
log_lik, mean = log_likelihood(train_data, flow)

In [None]:
log_lik_diff, mean_diff = difference_loglik(train_data, dataset, flow)

In [None]:
log_lik_diff, mean_diff

In [7]:
dataset = BSDS300()