In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

from functools import partial

from NormalizingFlows.src.train import train_backward_with_tuning, train_backward
from NormalizingFlows.src.scores import log_likelihood, difference_loglik
from NormalizingFlows.src.utils import update_device, load_best_model, load_checkpoint_model
from NormalizingFlows.src.flows import create_flows

from NormalizingFlows.src.structure.ar import AR 
from NormalizingFlows.src.structure.iar import IAR
from NormalizingFlows.src.structure.twoblock import TwoBlock

from NormalizingFlows.src.transforms.affine import Affine
from NormalizingFlows.src.transforms.piecewise import PiecewiseAffine
from NormalizingFlows.src.transforms.piecewise_additive import PiecewiseAffineAdditive
from NormalizingFlows.src.transforms.piecewise_affine import PiecewiseAffineAffine

from NormalizingFlows.src.data.toydata import ToyDataset
from NormalizingFlows.src.data.bsds300 import BSDS300
from NormalizingFlows.src.data.gas import Gas
from NormalizingFlows.src.data.hepmass import Hepmass
from NormalizingFlows.src.data.miniboone import Miniboone
from NormalizingFlows.src.data.power import Power

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device_cpu = torch.device("cpu")

In [4]:
dataset = BSDS300()
#dataset = Gas()
#dataset = Hepmass()
#dataset = Miniboone()
#dataset = Power()

print('Training size:', dataset.train_n, 'Validation size:', dataset.valid_n, 'Test size:', dataset.test_n)
print('Dimension:', dataset.dim_input)

Training size: 1000000 Validation size: 50000 Test size: 250000
Dimension: 63


In [5]:
dim_input = dataset.dim_input

num_trans = 10
perm_type = 'random'

In [6]:
dim_hidden = [126,126,105,105, 20, 10]

flows, names = [], []
flows.append(create_flows(dim_input, dim_hidden, num_trans, perm_type, flow_forward=False, structure=AR, 
            transformation=PiecewiseAffine)), names.append('PAF')

#flows.append(create_flows(dim_input, dim_hidden, num_trans, perm_type, flow_forward=False, structure=AR, 
#            transformation=PiecewiseAffineAdditive)), names.append('PAFAd')

#flows.append(create_flows(dim_input, dim_hidden, num_trans, perm_type, flow_forward=False, structure=AR, 
#            transformation=PiecewiseAffineAffine)), names.append('PAFAf')

#flows.append(create_flows(dim_input, dim_hidden, num_trans, permtype, flow_forward=False, structure=AR, 
#            transformation=Affine)), names.append('MAF')

#flows.append(create_flows(dim_input, dim_hidden, 2*num_trans, permtype, flow_forward=False, structure=AR, 
#            transformation=Affine)), names.append('MAF-double')

#flows.append(create_flows(dim_input, dim_hidden, num_trans, perm_type, flow_forward=False, structure=TwoBlock,
#            transformation=Affine)), names.append('Real NVP')

#flows.append(create_flows(dim_input, dim_hidden, 2*num_trans, perm_type, flow_forward=False, structure=TwoBlock,
#            transformation=Affine)), names.append('Real NVP-double')

#flows.append(create_flows(dim_input, dim_hidden, num_trans, perm_type, flow_forward=False, structure=TwoBlock,
#            transformation=PiecewiseAffine)), names.append('TwoBlock-PAF')

#flows.append(create_flows(dim_input, dim_hidden, num_trans, perm_type, flow_forward=False, structure=TwoBlock,
#            transformation=PiecewiseAffineAdditive)), names.append('TwoBlock-PAFAd')

#flows.append(create_flows(dim_input, dim_hidden, num_trans, perm_type, flow_forward=False, structure=TwoBlock,
#            transformation=PiecewiseAffineAffine)), names.append('TwoBlock-PAFAf')

for ind, flow in enumerate(flows):
    flow.name = names[ind]

In [None]:
tuning = False
if tuning:
    losses = []
    optimizers = []

    epochs = 200
    batch_size = 16
    num_hyperparam_samples = 4

    config = {
        'lr': tune.loguniform(1e-4, 1e-1),
        'weight_decay': tune.loguniform(1e-5, 1e-1)
    }
    scheduler = ASHAScheduler(
        time_attr='training_iteration',
        metric="loss",
        mode='min',
        max_t=epochs,
        grace_period=100,
        reduction_factor=2
    )
    reporter=CLIReporter(
        metric_columns=['loss', 'training_iteration']
    )

    for ind, flow in enumerate(flows):
        update_device(device_cpu, flow, dataset)
        result = tune.run(
            partial(train_backward_with_tuning, model=flow, dataset=dataset, epochs=epochs, batch_size=batch_size, print_n=epochs+1, name=names[ind]),
            config=config,
            num_samples=num_hyperparam_samples,
            scheduler=scheduler,
            progress_reporter=reporter,
            verbose=0
        )

        update_device(device_cpu, flow, dataset)


In [39]:
training = True
if training:
    losses = []
    optimizers = []

    epochs = 10
    batch_size = 800

    for i in range(len(flows)):
        flow = flows[i]
        update_device(device, flow, dataset)

        #optimizer = torch.optim.AdamW(flow.parameters(), lr=1e-3, weight_decay=1e-2)
        optimizer = torch.optim.SGD(flow.parameters(), lr=1e-4)
        optimizers.append(optimizer)

        losses.append(train_backward(flow, dataset.get_training_data(), optimizer, epochs, batch_size, print_n=200, save_checkpoint=True, burn_in=-1))

        update_device(device_cpu, flow, dataset)

ValueError: optimizer got an empty parameter list

In [None]:
#Additional training with same optimizer
additional_training = False
if additional_training:
    epochs = 1
    batch_size = 16
    add_flows, add_optimizers, add_losses = [], [], []
    for i in range(len(flows)):
        flow, optimizer, loss = load_checkpoint_model(flows[i], optimizers[i])
        add_flows.append(flow)
        add_optimizers.append(optimizer)
        add_losses.append(loss)
    
    flows, optimizers, losses = add_flows, add_optimizers, add_losses
    
    for i in range(len(flows)):
        flow = flows[i]
        update_device(device, flow, dataset)

        optimizer = optimizers[i]

        losses[i] += (train_backward(flow, dataset.get_training_data(), optimizer, epochs, batch_size, print_n=100, save_checkpoint=True, burn_in=1))

        update_device(device_cpu, flow, dataset)

In [None]:
best_flows = []
for flow in flows:
    best_flows.append(load_best_model(flow))

In [None]:
log_scale = False
num_epoch_skip = 0

plt.subplot(2,1,1)
for i in range(len(losses)):
    plt.plot(losses[i], label=names[i], alpha=0.8)
plt.legend()

plt.subplot(2,1,2)
for i in range(len(losses)):
    plt.plot((losses[i])[num_epoch_skip:], label=names[i], alpha=0.8)
plt.legend()

if log_scale:
    plt.yscale('log')

In [None]:
print('Results based on training data:' + '\n')

train_data = dataset.get_training_data()
for flow in best_flows:
    log_lik, mean = log_likelihood(train_data, flow)
    print("Mean loglikelihood for {}: {}".format(str(flow), mean))

In [None]:
print('Results based on validation data' + '\n')

valid_data = dataset.get_validation_data()
for flow in best_flows:           
    log_lik, mean = log_likelihood(valid_data, flow)
    print("Mean loglikelihood for {}: {}".format(str(flow), mean))

In [None]:
print('Results based on test data' + '\n')

test_data = dataset.get_test_data()
for flow in best_flows:           
    log_lik, mean = log_likelihood(test_data, flow)
    print("Mean loglikelihood for {}: {}".format(str(flow), mean))