In [7]:
import itertools
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append("./nf/")
import itertools

import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn.init as init
from torch import nn
from torch import distributions
from torch.nn.parameter import Parameter

from torch.distributions import (
    Normal,
    MultivariateNormal,
    Uniform,
    TransformedDistribution,
    SigmoidTransform,
)

from nf.nets import MLP
from nf.flows import NormalizingFlow, NormalizingFlowModel, Invertible1x1Conv, ActNorm
from nf.spline_flows import NSF_CL

from torch.utils.data import DataLoader, TensorDataset

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
elif device.type == "cpu":
    print('Using the cpu...')

Using the cpu...


In [9]:
# choose data here
spectra = np.loadtxt("./data/galah_batch_Xtrain.csv")

spectra = spectra.T
spectra = torch.Tensor(spectra)
dim = spectra.shape[-1]
print(dim)

labels = np.loadtxt("./data/galah_batch_ytrain.csv")
labels.shape

5000


(284, 28)

> build smalled dimensionality :))

In [15]:
y = labels[:, :3] # choose teff, log, feh
print(y.shape)

y = torch.tensor(y, dtype=torch.float32).reshape(-1, 3)
print(y.shape)

x = spectra[:,:10]

print('x shape', x.shape) #choose all the conditions
print('y shape', y.shape) #choose the first three labels to condition on

(284, 3)
torch.Size([284, 3])
x shape torch.Size([284, 10])
y shape torch.Size([284, 3])


> start the model 

In [16]:
# choose prior here
dim = x.shape[-1]
cond_dim = y.shape[-1]

base_mu, base_cov = torch.zeros(dim).to(device), torch.eye(dim).to(device)
prior = MultivariateNormal(base_mu, base_cov)


In [19]:
# configure the normalising flow
nfs_flow = NSF_CL
flows = [nfs_flow(dim=dim, context_features=cond_dim, K=8, B=3, hidden_dim=128) for _ in range(5)]
convs = [Invertible1x1Conv(dim=dim) for _ in flows]
norms = [ActNorm(dim=dim) for _ in flows]
flows = list(itertools.chain(*zip(norms, convs, flows)))

In [None]:
# initialise the model
model = NormalizingFlowModel(prior, flows).to(device)

# optimizer
optimizer = optim.Adam(model.parameters(), lr=2e-6, weight_decay=0)  # todo tune WD
print("number of params: ", sum(p.numel() for p in model.parameters()))

# train_loader
dataset = TensorDataset(x, y)

# Create a data loader from the dataset
# Type of sampling and batch size are specified at this step
loader = DataLoader(dataset, batch_size=71, shuffle=True, pin_memory=True) #this will give x, y per batch


> the actual training

In [None]:
model.train()
print("Started training")
for k in range(20000):
    for batch_idx, data_batch in enumerate(loader):
        x, y = data_batch
        x = x.to(device)
        y = y.to(device)
        zs, prior_logprob, log_det = model(x, context=y)
        logprob = prior_logprob + log_det
        loss = -torch.sum(logprob)  # NLL


        model.zero_grad()
        loss.backward()
        optimizer.step()

    if k % 100 == 0:
        print("Loss at step k =", str(k) + ":", loss.item())
    
path = f"test_model.pth"
torch.save(model.state_dict(), path)

In [None]:
cont = np.ones((300, 3))
cont[:, 0] = y[0][0]
cont[:, 1] = y[0][1]
cont[:, 2] = y[0][2]
cont = torch.tensor(cont, dtype=torch.float32).reshape(-1, 3)

zs = model.sample([300], context=cont)
z = zs[-1]
z = z.to('cpu')
z = z.detach().numpy()


In [10]:
fname = f'test_model.pth'
state_dict = torch.load(fname)



In [20]:
model = NormalizingFlowModel(prior, flows)

In [21]:
model.load_state_dict(state_dict)

<All keys matched successfully>

In [22]:
model.eval()

NormalizingFlowModel(
  (flow): NormalizingFlow(
    (flows): ModuleList(
      (0): ActNorm()
      (1): Invertible1x1Conv()
      (2): NSF_CL(
        (f1): MLP(
          (net): Sequential(
            (0): Linear(in_features=5, out_features=128, bias=True)
            (1): LeakyReLU(negative_slope=0.2)
            (2): Linear(in_features=128, out_features=128, bias=True)
            (3): LeakyReLU(negative_slope=0.2)
            (4): Linear(in_features=128, out_features=128, bias=True)
            (5): LeakyReLU(negative_slope=0.2)
            (6): Linear(in_features=128, out_features=115, bias=True)
          )
        )
        (f2): MLP(
          (net): Sequential(
            (0): Linear(in_features=5, out_features=128, bias=True)
            (1): LeakyReLU(negative_slope=0.2)
            (2): Linear(in_features=128, out_features=128, bias=True)
            (3): LeakyReLU(negative_slope=0.2)
            (4): Linear(in_features=128, out_features=128, bias=True)
            (5):

In [23]:
cont = np.ones((300, 3))
cont[:, 0] = y[0][0]
cont[:, 1] = y[0][1]
cont[:, 2] = y[0][2]
cont = torch.tensor(cont, dtype=torch.float32).reshape(-1, 3)

zs = model.sample([300], context=cont)
z = zs[-1]
z = z.to('cpu')
z = z.detach().numpy()



In [25]:
z

array([[1.0531175 , 0.98051447, 1.0748194 , ..., 0.6227579 , 1.1264045 ,
        0.7954603 ],
       [1.0398533 , 1.0208899 , 1.0808082 , ..., 0.769331  , 1.1143516 ,
        0.89089245],
       [1.0489525 , 0.97699344, 1.0360457 , ..., 0.56318367, 1.0298252 ,
        0.9020064 ],
       ...,
       [1.0420896 , 0.9865067 , 1.0371429 , ..., 0.71006024, 1.2230097 ,
        0.8171582 ],
       [1.0254723 , 1.0084943 , 1.0179685 , ..., 0.81573486, 1.1886083 ,
        0.7591804 ],
       [1.0421239 , 0.98651856, 1.0371597 , ..., 0.7100506 , 1.2232546 ,
        0.81725633]], dtype=float32)