In [2]:
import pytorch_lightning as pl
pl.seed_everything(42)
from torch import nn
import torch
from argparse import ArgumentParser
from data_generation_callback import MeshSampler
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
Global seed set to 42


ModuleNotFoundError: No module named 'data_generation_callback'

In [3]:
class VAE(pl.LightningModule):
    def __init__(self, hidden_dim = 64, enc_out_dim=64, latent_dim=32, input_dim=90351, output_dim=90351):
        super().__init__()

        self.save_hyperparameters()

        # encoder, decoder
        self.encoder = nn.Sequential(nn.Linear(in_features = input_dim, out_features = hidden_dim),
                                    nn.Sigmoid(),
                                    nn.Linear(in_features = hidden_dim, out_features = enc_out_dim))

        self.decoder = nn.Sequential(nn.Linear(in_features = latent_dim, out_features = hidden_dim),
                                    nn.Sigmoid(),
                                    nn.Linear(in_features = hidden_dim, out_features = output_dim),
                                    nn.Sigmoid())
        # distribution parameters
        self.fc_mu = nn.Linear(enc_out_dim, latent_dim)
        self.fc_var = nn.Linear(enc_out_dim, latent_dim)

        # for the gaussian likelihood
        self.log_scale = nn.Parameter(torch.Tensor([0.0]))

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4)

    def gaussian_likelihood(self, x_hat, logscale, x):
        scale = torch.exp(logscale)
        mean = x_hat
        dist = torch.distributions.Normal(mean, scale)

        # measure prob of seeing image under p(x|z)
        log_pxz = dist.log_prob(x)
        return log_pxz

    def kl_divergence(self, z, mu, std):
        # --------------------------
        # Monte carlo KL divergence
        # --------------------------
        # 1. define the first two probabilities (in this case Normal for both)
        p = torch.distributions.Normal(torch.zeros_like(mu), torch.ones_like(std))
        q = torch.distributions.Normal(mu, std)

        # 2. get the probabilities from the equation
        log_qzx = q.log_prob(z)
        log_pz = p.log_prob(z)

        # kl
        kl = (log_qzx - log_pz)
        kl = kl.sum(-1)
        return kl

    def training_step(self, batch, batch_idx):
        x = batch

        # encode x to get the mu and variance parameters
        x_encoded = self.encoder(x)
        mu, log_var = self.fc_mu(x_encoded), self.fc_var(x_encoded)

        # sample z from q
        std = torch.exp(log_var / 2)
        q = torch.distributions.Normal(mu, std)
        z = q.rsample()

        # decoded
        x_hat = self.decoder(z)

        # reconstruction loss
        recon_loss = self.gaussian_likelihood(x_hat, self.log_scale, x)
        recon_loss = ((x_hat - x)**2).mean()
        # kl
        kl = self.kl_divergence(z, mu, std)

        # elbo
        elbo = (0.0001*kl + recon_loss)
        elbo = elbo.mean()

        self.log_dict({
            'elbo': elbo,
            'kl': kl.mean(),
            'recon_loss': recon_loss.mean(),
            'reconstruction': recon_loss.mean(),
            'kl': kl.mean(),
        })

        return elbo

In [9]:
parser = ArgumentParser()
parser.add_argument('--gpus', type=int, default=0)
parser.add_argument('--dataset', type=str, default='wavesuite')
args, unknown = parser.parse_known_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if args.dataset == 'CMCC':
    directory = '/data/c-quilo/CMCC/'
    input_filename = 'sateNo4_1_Unut_data_40_to_99.npy'
    #input_filename = 'sateNo4_1_pcs_nut_data_40_to_99.npy'
    input = np.load(directory + input_filename)

    def scaler(x, xmin, xmax, min, max):
        scale = (max - min) / (xmax - xmin)
        xScaled = scale * x + min - xmin * scale
        return xScaled

    min_input = np.min(input)
    max_input = np.max(input)

    input = scaler(input, min_input, max_input, 0, 1)
    input = torch.FloatTensor(input).to(device)

sampler = MeshSampler()

vae = VAE().to(device)
trainer = pl.Trainer(gpus=args.gpus, max_epochs=500, accelerator='gpu')#, callbacks=[sampler])
trainer.fit(vae, input)

Epoch 20: 100%|██████████| 59/59 [00:19<00:00,  3.03it/s, loss=-0.436, v_num=106]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 217 M 
1 | decoder | Sequential | 221 M 
2 | fc_mu   | Linear     | 2.1 K 
3 | fc_var  | Linear     | 2.1 K 
---------------------------------------
439 M     Trainable params
0         Non-trainable params
439 M     Total params
1,756.714 Total estimated model params size (MB)


Epoch 40:   2%|▏         | 1/59 [00:00<00:00, 136.22it/s, loss=0.000369, v_num=107]

In [11]:
# Z COMES FROM NORMAL(0, 1)
num_preds = 100
rand_v = torch.rand((num_preds, 32))
#p = torch.distributions.Normal(torch.zeros_like(rand_v), torch.zeros_like(rand_v))
#z = p.rsample()
latent_dim = 32
z = np.random.normal(size=(num_preds, latent_dim))
z = torch.FloatTensor(z)
with torch.no_grad():
    pred = vae.decoder(z.to(device)).cpu()
    print(pred)

def inverseScaler(xscaled, xmin, xmax, min, max):
    scale = (max - min) / (xmax - xmin)
    xInv = (xscaled/scale) - (min/scale) + xmin
    return xInv
#print(y_pred)
pred = inverseScaler(pred, min_input, max_input, 0, 1)
# SAMPLE IMAGES
#with torch.no_grad():
#    pred = pl_module.decoder(z).cpu()
print(pred)
np.save(directory + 'y_pred', pred)
#!scp -r y_pred 

tensor([[0.6563, 0.6560, 0.6638,  ..., 0.6914, 0.6696, 0.6711],
        [0.6525, 0.6909, 0.6404,  ..., 0.6707, 0.6656, 0.6713],
        [0.6687, 0.6927, 0.6811,  ..., 0.6827, 0.6629, 0.7013],
        ...,
        [0.6626, 0.6656, 0.6791,  ..., 0.7010, 0.6583, 0.6940],
        [0.7209, 0.6860, 0.7075,  ..., 0.6922, 0.6943, 0.7021],
        [0.7070, 0.7047, 0.7090,  ..., 0.6997, 0.6879, 0.6684]])
tensor([[-2.9694, -3.0044, -2.1723,  ...,  0.7819, -1.5543, -1.3917],
        [-3.3781,  0.7209, -4.6683,  ..., -1.4345, -1.9753, -1.3664],
        [-1.6506,  0.9180, -0.3241,  ..., -0.1512, -2.2625,  1.8302],
        ...,
        [-2.2920, -1.9801, -0.5355,  ...,  1.8058, -2.7566,  1.0539],
        [ 3.9238,  0.2023,  2.4916,  ...,  0.8646,  1.0868,  1.9204],
        [ 2.4435,  2.1998,  2.6546,  ...,  1.6592,  0.4002, -1.6802]])


In [13]:
import seaborn as sns
import pandas as pd
y_pred = pred
U_gen = y_pred[:, :851101*3]
nut_gen = y_pred[:, 851101*3:851101*4]
Unut_data = np.load(directory + input_filename)


U_mag_gen = np.sqrt(U_gen[:, :851101]**2 + U_gen[:, 851101:851101*2]**2 + U_gen[:, 851101*2:851101*3]**2)
df1 = pd.DataFrame(U_mag_gen)
df1['label'] = 'U'
df2 = pd.DataFrame(nut_gen)
df2['label'] = 'nut'

df1 = df1.append(df2, ignore_index=True)
label = df1["label"]
df1 = df1.drop(labels = ["label"],axis = 1)
#TSNE

Unut_gen_embedded = TSNE(n_components=2).fit_transform(df1)
tsne2done = Unut_gen_embedded[:, 0]
tsne2dtwo = Unut_gen_embedded[:, 1]
plt.figure(figsize=(16,10))
sns.scatterplot(
    x=tsne2done, y=tsne2dtwo,
    hue=label,
    data=df1,
    alpha=0.5,
    markers='s'
)


U_mag_real = np.sqrt(Unut_data[:, :851101]**2 + Unut_data[:, 851101:851101*2]**2 + Unut_data[:, 851101*2:851101*3]**2)
df1 = pd.DataFrame(U_mag_real)
df1['label'] = 'U'
df2 = pd.DataFrame(Unut_data[:, 851101*3:851101*4])
df2['label'] = 'nut'

df1 = df1.append(df2, ignore_index=True)
label = df1["label"]
df1 = df1.drop(labels = ["label"],axis = 1)

Unut_gen_embedded = TSNE(n_components=2).fit_transform(df1)
tsne2done = Unut_gen_embedded[:, 0]
tsne2dtwo = Unut_gen_embedded[:, 1]
sns.scatterplot(
    x=tsne2done, y=tsne2dtwo,
    hue=label,
    data=df1,
    alpha=1,
    markers='s'
)

df1 = pd.DataFrame(U_mag_real)
df1['label'] = 'U_real'
df2 = pd.DataFrame(Unut_data[:, 851101*3:851101*4])
df2['label'] = 'nut_real'
df1 = df1.append(df2, ignore_index=True)

df3 = pd.DataFrame(U_mag_gen)
df3['label'] = 'U_gen'
df1 = df1.append(df3, ignore_index=True)

df4 = pd.DataFrame(nut_gen)
df4['label'] = 'nut_gen'
df1 = df1.append(df4, ignore_index=True)
