# Trace Generator training: generative models for Darshan trace

This is for a demonstration of how to train Trace Generator for Darshan trace.

In [None]:
from __future__ import print_function

import argparse
import os
import random
import sys
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.utils.data
from scipy import stats
from sklearn.model_selection import train_test_split
from torch import nn, optim

# from torchvision import datasets, transforms
# from torchvision.utils import save_image
from torch.autograd import Variable
from torch.nn import functional as F

# plt.rcParams.update({'font.size': 16})
# plt.rcParams['figure.dpi'] = 150

In [None]:
class ResNet_block(torch.nn.Module):
    def __init__(self, n, act=torch.nn.LeakyReLU()):
        super().__init__()
        self.module = torch.nn.Sequential(
            torch.nn.Linear(n, n), torch.nn.LeakyReLU(), torch.nn.Linear(n, n),
        )
        self.act = act

    def forward(self, inputs):
        x = self.module(inputs)
        return self.act(x + inputs)

In [None]:
# https://github.com/pytorch/examples/blob/master/vae/main.py
class VAE(nn.Module):
    def __init__(self, indim, outdim, nh=8, nz=4):
        super(VAE, self).__init__()
        self.indim = indim
        self.outdim = outdim

        self._enc = torch.nn.Sequential(
            torch.nn.Linear(indim, 64),
            ResNet_block(64),
            ResNet_block(64),
            torch.nn.Linear(64, 32),
            ResNet_block(32),
            ResNet_block(32),
            torch.nn.Linear(32, nh),
            nn.ReLU(),
        )

        self._dec = torch.nn.Sequential(
            torch.nn.Linear(nz, nh),
            torch.nn.Linear(nh, 32),
            ResNet_block(32),
            ResNet_block(32),
            torch.nn.Linear(32, 64),
            ResNet_block(64),
            ResNet_block(64),
            torch.nn.Linear(64, outdim),
            nn.Sigmoid(),
        )

        # self.fc1 = nn.Linear(self.indim, nh)
        self.fc21 = nn.Linear(nh, nz)
        self.fc22 = nn.Linear(nh, nz)
        # self.fc3 = nn.Linear(nz, nh)
        # self.fc4 = nn.Linear(nh, self.outdim)

    def encode(self, x):
        # h1 = F.leaky_relu(self.fc1(x))
        h1 = self._enc(x)
        return self.fc21(h1), self.fc22(h1)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        # h3 = F.leaky_relu(self.fc3(z))
        # return torch.sigmoid(self.fc4(h3))
        return self._dec(z)

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, self.indim))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

In [None]:
class autoencoder(nn.Module):
    def __init__(self, indim, outdim, nz=20):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(indim, 64),
            ResNet_block(64),
            ResNet_block(64),
            nn.Linear(64, 32),
            ResNet_block(32),
            ResNet_block(32),
            nn.Linear(32, nz),
        )
        self.decoder = nn.Sequential(
            nn.Linear(nz, 32),
            ResNet_block(32),
            ResNet_block(32),
            nn.Linear(32, 64),
            ResNet_block(64),
            ResNet_block(64),
            nn.Linear(64, outdim),
            nn.ReLU(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
class Generator(nn.Module):
    def __init__(self, indim, outdim):
        super(Generator, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(indim, 64),
            ResNet_block(64),
            ResNet_block(64),
            torch.nn.Linear(64, 32),
            ResNet_block(32),
            ResNet_block(32),
            torch.nn.Linear(32, outdim),
            nn.Tanh(),
        )

    def forward(self, z):
        return self.model(z)


class Discriminator(nn.Module):
    def __init__(self, indim, outdim=1):
        super(Discriminator, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(indim, 64),
            ResNet_block(64),
            ResNet_block(64),
            torch.nn.Linear(64, 32),
            ResNet_block(32),
            ResNet_block(32),
            torch.nn.Linear(32, outdim),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.model(x)

In [None]:
# https://discuss.pytorch.org/t/rmse-loss-function/16540/2
def RMSELoss(yhat, y):
    return torch.sqrt(torch.mean((yhat - y) ** 2) + torch.finfo(torch.float32).eps)

In [None]:
# Reconstruction + KL divergence losses summed over all elements and batch
def vae_loss_function(recon_x, x, mu, logvar, dim=20, alpha=1.0):
    BCE = F.binary_cross_entropy(recon_x, x.view(-1, dim), reduction="sum")
    MSE = F.mse_loss(recon_x, x.view(-1, dim), reduction="sum")
    # RMSE = torch.sum(RMSELoss(recon_x, x.view(-1, 20)))

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return MSE + alpha * KLD

In [None]:
parser = argparse.ArgumentParser(description="TG training")
parser.add_argument(
    "--batch-size",
    type=int,
    default=128,
    metavar="N",
    help="input batch size for training",
)
parser.add_argument(
    "--epochs",
    type=int,
    default=1000,
    metavar="N",
    help="number of epochs to train (default: 10)",
)
parser.add_argument(
    "--cuda", action="store_true", default=False, help="disables CUDA training"
)
parser.add_argument(
    "--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
)
parser.add_argument(
    "--log-interval",
    type=int,
    default=1000,
    metavar="N",
    help="how many batches to wait before logging training status",
)

args = parser.parse_args(["--batch-size=32"])
device = torch.device("cuda" if args.cuda else "cpu")
device

In [None]:
## parameters
dtype = "huge"  #'huge', large', 'medium'
DIM = 0  ## 7, 25, 35
NSAMPLES = 20
restart_model = None
expname = None
save_model = False
use_fg = True
modelname = "net"
MSAMPLES = 100

In [None]:
if dtype == "huge":
    nlen, nclass, MDIM = 26, 10, 75 + 7
if dtype == "large":
    nlen, nclass, MDIM = 220, 10, 60 + 7
if dtype == "medium":
    # nlen, nclass = 945, 12
    nlen, nclass, MDIM = 417, 7, 25 + 7

In [None]:
if (DIM > MDIM) or (DIM < 1):
    DIM = MDIM

In [None]:
print(">>> Parameters")
for pname in [
    "dtype",
    "MDIM",
    "DIM",
    "NSAMPLES",
    "restart_model",
    "expname",
    "save_model",
    "use_fg",
    "modelname",
    "MSAMPLES",
]:
    print("%s: %r" % (pname, eval(pname)))

In [None]:
if expname is None:
    now = datetime.now()
    expname = now.strftime("%Y%m%d-%H%M%S")
    expname = "%s-%d" % (expname, random.randint(1000, 9999))
print("expname:", expname)

In [None]:
if use_fg:
    x = np.load("flow_train_x_%s_%d_DIM%d_flow.npy" % (dtype, nlen, MDIM))
    y = np.load("flow_train_y_%s_%d_DIM%d_flow.npy" % (dtype, nlen, MDIM))
    x = x[:, :DIM]

    lb = np.load("flow_train_lb_%s_%d_DIM%d_flow.npy" % (dtype, nlen, MDIM))
    app = np.load("train_classes_%s_%d_%d.npy" % (dtype, nlen, nclass))
    plt.hist(lb, bins=np.arange(0, len(app) + 1))
else:
    x = np.load("train_x_%s_%d.npy" % (dtype, nlen))
    y = np.load("train_y_%s_%d.npy" % (dtype, nlen))
    x = x[:, :DIM]

    lb = np.load("train_lb_%s_%d.npy" % (dtype, nlen))
    app = np.load("train_classes_%s_%d_%d.npy" % (dtype, nlen, nclass))
    plt.hist(lb, bins=np.arange(0, len(app) + 1))

    x_list = list()
    y_list = list()
    lb_list = list()

    for i in range(nclass):
        k = np.where(lb == i)[0]
        k = np.random.choice(k, MSAMPLES)

        x_list.append(x[k])
        y_list.append(y[k])
        lb_list.append(lb[k])

    x = np.vstack(x_list)
    y = np.vstack(y_list)
    lb = np.hstack(lb_list)

print(x.shape, y.shape, lb.shape, app.shape)

In [None]:
xapp = np.zeros((len(x), len(app)), dtype=np.float32)
xapp[np.arange(len(x)), lb] = 1.0
xx = np.hstack((x, xapp))

xapp.shape, xx.shape, x.shape, y.shape

In [None]:
plt.hist(np.where(xapp == 1.0)[1], bins=np.arange(0, len(app) + 1));

In [None]:
X_train, X_test, y_train, y_test, lb_train, lb_test = train_test_split(
    xx, y, lb, test_size=0.1
)
print(len(X_train), len(y_train), len(lb_train), len(X_test), len(y_test), len(lb_test))

In [None]:
plt.figure(figsize=[14, 4])
plt.subplot(1, 3, 1)
plt.pcolor(xapp)
plt.colorbar()
plt.subplot(1, 3, 2)
plt.pcolor(x)
plt.colorbar()
plt.subplot(1, 3, 3)
plt.pcolor(y)
plt.colorbar()

## Regression/Autoencoder

In [None]:
NET = torch.nn.Sequential(
    torch.nn.Linear(nclass + DIM, 128),
    # 32 filters in and out, no max pooling so the shapes can be added
    ResNet_block(128),
    ResNet_block(128),
    torch.nn.Linear(128, 64),
    ResNet_block(64),
    ResNet_block(64),
    torch.nn.Linear(64, 32),
    ResNet_block(32),
    ResNet_block(32),
    torch.nn.Linear(32, 20),
)

In [None]:
model_list = {
    "net": NET,
    "vae": VAE(nclass + DIM, 20, nh=32, nz=16),
    "ae": autoencoder(nclass + DIM, 20),
    "gan": Generator(nclass + DIM, 20),
}

In [None]:
for k, model in model_list.items():
    print("Model params:", k)
    print("-" * 20)
    num_params = 0
    for k, v in model.state_dict().items():
        print("%20s\t%20s\t%10d" % (k, list(v.shape), v.numel()))
        num_params += v.numel()
    print("-" * 50)
    print("%20s\t%20s\t%10d" % ("Total", "", num_params))
    print("All (total, MB): %d %g" % (num_params, num_params * 4 / 1024 / 1024))

In [None]:
training_data = torch.utils.data.TensorDataset(
    torch.tensor(X_train), torch.tensor(y_train), torch.tensor(lb_train)
)
testing_data = torch.utils.data.TensorDataset(
    torch.tensor(X_test), torch.tensor(y_test), torch.tensor(lb_test)
)
full_data = torch.utils.data.TensorDataset(
    torch.tensor(np.hstack((x, xapp))), torch.tensor(y), torch.tensor(lb)
)

kwargs = {"num_workers": 1, "pin_memory": True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(
    training_data, batch_size=args.batch_size, shuffle=True, drop_last=False, **kwargs
)
test_loader = torch.utils.data.DataLoader(
    testing_data, batch_size=args.batch_size, shuffle=True, drop_last=False, **kwargs
)
full_loader = torch.utils.data.DataLoader(
    full_data, batch_size=args.batch_size, shuffle=True, drop_last=False, **kwargs
)

print(len(train_loader), len(test_loader), len(full_loader))

In [None]:
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss
model = model_list[modelname].to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-3)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2_000, gamma=0.1)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", patience=200)
epoch = 0

if modelname == "gan":
    discriminator = Discriminator(20).to(device)
    optimizer_D = optim.Adam(discriminator.parameters(), lr=1e-3)
    scheduler_D = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer_D, "min", patience=200
    )
    adversarial_loss = torch.nn.BCELoss()

In [None]:
for k in range(20):
    if restart_model is not None:
        break
    train_loss_list = list()
    for _ in range(100):
        epoch += 1
        model.train()
        train_loss = 0
        for batch_idx, (data, lab, _) in enumerate(train_loader, 1):
            data, lab = data.to(device), lab.to(device)
            optimizer.zero_grad()
            if modelname == "vae":
                recon_batch, mu, logvar = model(data)
                loss = vae_loss_function(recon_batch, lab, mu, logvar)
            else:
                recon_batch = model(data)
                # loss = loss_func(recon_batch, lab)
                if modelname == "gan":
                    valid = Variable(
                        torch.Tensor(data.size(0), 1).fill_(1.0), requires_grad=False
                    )
                    fake = Variable(
                        torch.Tensor(data.size(0), 1).fill_(0.0), requires_grad=False
                    )
                    loss = adversarial_loss(discriminator(recon_batch), valid)
                else:
                    loss = loss_func(recon_batch, lab)

            loss.backward()
            train_loss += loss.item()
            optimizer.step()

            if modelname == "gan":
                ## Train Discriminator
                optimizer_D.zero_grad()
                real_loss = adversarial_loss(discriminator(lab), valid)
                fake_loss = adversarial_loss(discriminator(recon_batch.detach()), fake)
                d_loss = (real_loss + fake_loss) / 2

                d_loss.backward()
                optimizer_D.step()

            if batch_idx % args.log_interval == 0:
                print(
                    "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.07g}".format(
                        epoch,
                        batch_idx * len(data),
                        len(train_loader.dataset),
                        100.0 * batch_idx / len(train_loader),
                        loss.item() / len(data),
                    )
                )

        train_loss_list.append(train_loss / len(train_loader.dataset))

        if epoch % 10 == 0:
            print(
                "====> Epoch: {} Average loss: {:.04g} LR: {:.03g}".format(
                    epoch,
                    train_loss / len(train_loader.dataset),
                    optimizer.param_groups[0]["lr"],
                )
            )

        if epoch % 100 == 0:
            print("====> Epoch: {} Resampling ...".format(epoch))
            model.eval()
            recon_loader = torch.utils.data.DataLoader(
                training_data,
                batch_size=args.batch_size,
                shuffle=False,
                drop_last=False,
                **kwargs
            )
            err_list = list()
            with torch.no_grad():
                for i, (data, lab, _) in enumerate(recon_loader):
                    data, lab = data.to(device), lab.to(device)
                    recon_batch = model(data)
                    if modelname == "vae":
                        recon_batch = recon_batch[0]
                    err = torch.sum(
                        torch.sqrt(
                            F.mse_loss(recon_batch, lab.view(-1, 20), reduction="none")
                        ),
                        axis=1,
                    )
                    err_list.extend(err.detach().cpu().numpy())

                weights = err_list / np.sum(err_list)
                sampler = torch.utils.data.WeightedRandomSampler(
                    weights, len(weights), replacement=True
                )
                train_loader = torch.utils.data.DataLoader(
                    training_data,
                    batch_size=args.batch_size,
                    shuffle=False,
                    drop_last=False,
                    sampler=sampler,
                )

        scheduler.step(loss)
        if modelname == "gan":
            scheduler_D.step(d_loss)

    if (k + 1) % 10 == 0:
        plt.figure()
        plt.plot(train_loss_list)
        plt.title("Epoch: %d MSE: %g" % (epoch, np.mean(train_loss_list)))
        plt.show()

        fname = "gmodel_%s_%d_DIM%d_%s_fg%d.torch" % (
            dtype,
            nlen,
            DIM,
            modelname,
            use_fg,
        )
        torch.save(model.state_dict(), fname)
        print("Model saved:", fname)

In [None]:
if restart_model is None:
    fname = "gmodel_%s_%d_DIM%d.%s.torch" % (dtype, nlen, DIM, expname)
    if save_model:
        torch.save(model.state_dict(), fname)
        print("Model saved:", fname)
else:
    model.load_state_dict(torch.load(restart_model))
    print("Model loaded:", restart_model)

In [None]:
model.eval()
test_loss = 0
lab_list = list()
recon_list = list()
err_list = list()
with torch.no_grad():
    for i, (data, lab, _) in enumerate(test_loader):
        data, lab = data.to(device), lab.to(device)
        recon_batch = model(data)
        if modelname == "vae":
            recon_batch = recon_batch[0]
        mk0 = recon_batch < 0.0
        recon_batch[mk0] = 0.0
        mk1 = recon_batch > 1.0
        recon_batch[mk1] = 1.0

        test_loss += loss_func(recon_batch, lab).item()
        lab_list.append(lab.detach().cpu().numpy())
        recon_list.append(recon_batch.detach().cpu().numpy())
        # https://www.statisticshowto.com/probability-and-statistics/regression-analysis/rmse-root-mean-square-error/
        err = torch.sqrt(
            torch.mean(
                F.mse_loss(recon_batch, lab.view(-1, 20), reduction="none"), axis=1
            )
        )
        err_list.extend(err.detach().cpu().numpy())

lab_arr = np.concatenate(lab_list)
recon_arr = np.concatenate(recon_list)

test_loss /= len(test_loader.dataset)
print("len(test_loader.dataset)", len(test_loader.dataset))
print("====> Test set loss: {:f}".format(test_loss))
print("====> Test RMSE loss (min,max):", np.min(err_list), np.max(err_list))
print("====> Test RMSE loss (median):", np.median(err_list))
print("====> Test RMSE loss (mean,std):", np.mean(err_list), np.std(err_list))
print("====> Test RMSE loss (mode):", stats.mode(err_list)[0])

In [None]:
plt.figure(figsize=[10, 4])
plt.subplot(1, 2, 1)
plt.pcolor(lab.detach().cpu().numpy())
plt.colorbar()
plt.subplot(1, 2, 2)
plt.pcolor(recon_batch.detach().cpu().numpy())
plt.colorbar()

In [None]:
len(err_list), len(full_loader.dataset), len(full_loader)

In [None]:
model.eval()
test_loss = 0
lab_list = list()
recon_list = list()
ax_list = list()
err_list = list()
with torch.no_grad():
    for i, (data, lab, ax) in enumerate(full_loader):
        data, lab, ax = data.to(device), lab.to(device), ax.to(device)
        recon_batch = model(data)
        if modelname == "vae":
            recon_batch = recon_batch[0]
        mk0 = recon_batch < 0.0
        recon_batch[mk0] = 0.0
        mk1 = recon_batch > 1.0
        recon_batch[mk1] = 1.0
        test_loss += loss_func(recon_batch, lab).item()
        lab_list.append(lab.detach().cpu().numpy())
        recon_list.append(recon_batch.detach().cpu().numpy())
        ax_list.append(ax.detach().cpu().numpy())
        # https://www.statisticshowto.com/probability-and-statistics/regression-analysis/rmse-root-mean-square-error/
        err = torch.sqrt(
            torch.mean(
                F.mse_loss(recon_batch, lab.view(-1, 20), reduction="none"), axis=1
            )
        )
        err_list.extend(err.detach().cpu().numpy())

lab_arr = np.concatenate(lab_list)
recon_arr = np.concatenate(recon_list)
ax_arr = np.concatenate(ax_list)
print("test_loss", test_loss)

test_loss /= len(full_loader.dataset)
print("len(full_loader.dataset)", len(full_loader.dataset))
print("====> Full set loss: {:f}".format(test_loss))
print(
    "====> Full set loss (RMSE): {:f}".format(
        np.sum(err_list) / len(full_loader.dataset)
    )
)
print("====> Full RMSE loss (min,max):", np.min(err_list), np.max(err_list))
print("====> Full RMSE loss (median):", np.median(err_list))
print("====> Full RMSE loss (mean,std):", np.mean(err_list), np.std(err_list))
print("====> Full RMSE loss (mode):", stats.mode(err_list)[0])

In [None]:
np.sum(err_list), test_loss

In [None]:
plt.figure(figsize=[10, 4])
plt.subplot(1, 2, 1)
plt.pcolor(lab.detach().cpu().numpy())
plt.colorbar()
plt.subplot(1, 2, 2)
plt.pcolor(recon_batch.detach().cpu().numpy())
plt.colorbar()

In [None]:
for i in range(len(lab)):
    plt.figure()
    plt.plot(lab[i, :].detach().cpu().numpy(), ".-", label="Original")
    plt.plot(recon_batch[i, :].detach().cpu().numpy(), ".-", label="Recon")
    plt.legend()

In [None]:
print(len(err_list))
plt.figure(figsize=[12, 4])
plt.subplot(1, 2, 1)
plt.xlabel("ID")
plt.plot(err_list)
plt.axhline(np.median(err_list), c="g", label="median")
plt.axhline(np.mean(err_list), c="r", label="mean")
plt.legend()
plt.subplot(1, 2, 2)
plt.xlabel("")
plt.hist(err_list)
plt.axvline(np.median(err_list), c="g", label="median")
plt.axvline(np.mean(err_list), c="r", label="mean")
plt.legend()

In [None]:
od = np.argsort(err_list)
print(len(err_list))
plt.figure(figsize=[12, 4])
plt.subplot(1, 2, 1)
plt.plot(np.array(err_list)[od])
plt.axhline(np.median(err_list), c="g", label="median")
plt.axhline(np.mean(err_list), c="r", label="mean")
plt.legend()
plt.subplot(1, 2, 2)
plt.hist(err_list)
plt.axvline(np.median(err_list), c="g", label="median")
plt.axvline(np.mean(err_list), c="r", label="mean")
plt.legend()

In [None]:
od = np.argsort(err_list)
for i in od[:5]:
    plt.figure()
    plt.plot(lab_arr[i, :], ".-", label="Original")
    plt.plot(recon_arr[i, :], ".-", label="Recon")
    plt.legend()
    plt.ylim([-0.2, 1.2])

In [None]:
od = np.argsort(err_list)
for i in od[::-1][:5]:
    plt.figure()
    plt.plot(lab_arr[i, :], ".-", label="Original")
    plt.plot(recon_arr[i, :], ".-", label="Recon")
    plt.legend()
    # plt.ylim([-0.2,1.2])

## Generation

In [None]:
from normalizing_flows import NormalizingFlow
from scipy.interpolate import interp1d, make_interp_spline
from utils import random_normal_samples

# from scipy.signal import savgol_filter
# from scipy.interpolate import Rbf
# from scipy.interpolate import InterpolatedUnivariateSpline
# from scipy.interpolate import interp1d
# from scipy.interpolate import make_lsq_spline

In [None]:
def loess(x, y, frac=0.2, it=None, scatter=True):
    from statsmodels.nonparametric.smoothers_lowess import lowess

    y = np.array(y)
    x = np.array(x)
    y = y[x.argsort()]  # Sort y according to order of x.
    x.sort()  # Sort x in place.
    if it is not None:  # Helps if you are getting NaN's in the output.
        d = lowess(y, x, frac=frac, it=it)
    else:
        d = lowess(y, x, frac=frac)
    return d

In [None]:
def moving_average(x, w):
    return np.convolve(x, np.ones(w), "valid") / w

In [None]:
def smooth(lab, n=40):
    vf1 = np.vectorize(lambda x: max(x, 0.0))
    vf2 = np.vectorize(lambda x: min(x, 1.0))

    lab = vf1(lab)
    lab = vf2(lab)

    x = np.linspace(0, 1, 20)
    intp = interp1d(x, lab, kind="slinear")

    xi = np.linspace(x.min(), x.max(), n)
    yi = intp(xi)
    # yi = moving_average(yi, k)

    # intp2 = make_interp_spline(xi, yi)
    # xs = xi
    # ys = intp2(xs)

    # return (xs, ys)
    return (xi, yi)

In [None]:
for i in range(len(app)):
    fig = plt.figure(figsize=[20, 4])
    ax1 = fig.add_subplot(1, 3, 1)
    ax2 = fig.add_subplot(1, 3, 2)
    for recon, lab, adx in zip(recon_arr, lab_arr, ax_arr):
        if adx == i:
            xs, ys = smooth(lab)
            ax1.plot(xs, ys)

            xs, ys = smooth(recon)
            ax2.plot(xs, ys)
    ax1.set_title("%s (Original)" % (app[i]))
    ax1.set_xlabel("Time (Normalized)")
    ax1.set_ylabel("Write I/O Intensity (Normalized)")
    ax2.set_title("%s (Generated)" % (app[i]))
    ax2.set_xlabel("Time (Normalized)")
    ax2.set_ylabel("Write I/O Intensity (Normalized)")

## Full generation

In [None]:
for i in range(len(app)):
    fig = plt.figure(figsize=[20, 4])
    ax1 = fig.add_subplot(1, 3, 1)
    for recon, lab, adx in zip(recon_arr, lab_arr, ax_arr):
        if adx == i:
            xs, ys = smooth(lab)
            ax1.plot(xs, ys)

    ax1.set_title("%s (Original)" % (app[i]))
    ax1.set_xlabel("Time (Normalized)")
    ax1.set_ylabel("Write I/O Intensity (Normalized)")

    ## full re-generation
    try:
        fname = "flowmodel_%s_%d_DIM%d_app%d.torch" % (dtype, nlen, MDIM, i)
        flowmodel = NormalizingFlow(MDIM, 32)
        flowmodel.load_state_dict(torch.load(fname))
    except:
        continue

    flowmodel.eval()
    samples = (
        (flowmodel.sample(random_normal_samples(NSAMPLES, dim=MDIM))).detach().numpy()
    )

    x = samples[:, :DIM]

    xapp = np.zeros((len(x), len(app)), dtype=np.float32)
    xapp[:, i] = 1.0
    xx = np.hstack((x, xapp))

    xx = torch.tensor(xx).to(device)

    recon = model(xx)
    if modelname == "vae":
        recon = recon[0]
    recon = recon.detach().cpu().numpy()

    ax3 = fig.add_subplot(1, 3, 2)
    for y in recon:
        xs, ys = smooth(y)
        ax3.plot(xs, ys)
    ax3.set_title("%s (Generated)" % (app[i]))
    ax3.set_xlabel("Time (Normalized)")
    ax3.set_ylabel("Write I/O Intensity (Normalized)")

In [None]:
recon_arr.shape, lab_arr.shape, ax_arr.shape

In [None]:
plt.rcParams["font.size"] = 12
for i in range(len(app)):
    fig = plt.figure()
    ax1 = fig.add_subplot(1, 1, 1)
    idx = np.arange(len(recon_arr))
    np.random.shuffle(idx)
    for recon, lab, adx in zip(recon_arr[idx, :], lab_arr[idx, :], ax_arr[idx]):
        if adx == i:
            xs, ys = smooth(lab)
            ax1.plot(xs, ys)

    ax1.set_title("%s (Original)" % (app[i]))
    ax1.set_xlabel("Time (Normalized)")
    ax1.set_ylabel("Write I/O Intensity (Normalized)")

    fig = plt.figure()
    ax2 = fig.add_subplot(1, 1, 1)
    for recon, lab, adx in zip(recon_arr, lab_arr, ax_arr):
        if adx == i:
            xs, ys = smooth(recon)
            ax2.plot(xs, ys)
    ax2.set_title("%s (Generated)" % (app[i]))
    ax2.set_xlabel("Time (Normalized)")
    ax2.set_ylabel("Write I/O Intensity (Normalized)")

    ## full re-generation
    try:
        fname = "flowmodel_%s_%d_DIM%d_app%d.torch" % (dtype, nlen, MDIM, i)
        flowmodel = NormalizingFlow(MDIM, 32)
        flowmodel.load_state_dict(torch.load(fname))
    except:
        ## skip
        continue

    flowmodel.eval()
    samples = (
        (flowmodel.sample(random_normal_samples(NSAMPLES, dim=MDIM))).detach().numpy()
    )

    x = samples[:, :DIM]

    xapp = np.zeros((len(x), len(app)), dtype=np.float32)
    xapp[:, i] = 1.0
    xx = np.hstack((x, xapp))

    xx = torch.tensor(xx).to(device)

    recon = model(xx)
    if modelname == "vae":
        recon = recon[0]
    recon = recon.detach().cpu().numpy()

    fig = plt.figure()
    ax3 = fig.add_subplot(1, 1, 1)
    for y in recon:
        xs, ys = smooth(y)
        ax3.plot(xs, ys)
    ax3.set_title("%s (Generated)" % (app[i]))
    ax3.set_xlabel("Time (Normalized)")
    ax3.set_ylabel("Write I/O Intensity (Normalized)")

In [None]:
flowmodelname = "flow"  ## 'flow', 'gan', vae'

plt.rcParams["font.size"] = 12
for i in range(len(app)):
    fig, axs = plt.subplots(1, 3, figsize=[12, 3])
    # fig.subplots_adjust(left=0.1,right=.9,top=0.95,bottom=0.01,wspace=0.05,hspace=0.05)

    idx = np.arange(len(recon_arr))
    np.random.shuffle(idx)
    for recon, lab, adx in zip(recon_arr[idx, :], lab_arr[idx, :], ax_arr[idx]):
        if adx == i:
            xs, ys = smooth(lab)
            axs[0].plot(xs, ys, ".-")

    axs[0].set_title("%s" % (app[i]), fontweight="bold")

    for recon, lab, adx in zip(recon_arr, lab_arr, ax_arr):
        if adx == i:
            xs, ys = smooth(recon)
            axs[1].plot(xs, ys, ".-")
    axs[1].set_title("Generated (TG only)")

    ## full re-generation
    fname = "flowmodel_%s_%d_DIM%d_app%d_%s.torch" % (
        dtype,
        nlen,
        MDIM,
        i,
        flowmodelname,
    )
    print("Load ", fname)

    try:
        flowmodel = NormalizingFlow(MDIM, n_flows=10)
        flowmodel.load_state_dict(torch.load(fname))
    except Exception as e:
        print("Unexpected error:", sys.exc_info()[0])
        print("Unexpected error:", e)
        continue

    flowmodel.eval()
    samples = (
        (flowmodel.sample(random_normal_samples(NSAMPLES, dim=MDIM))).detach().numpy()
    )

    x = samples[:, :DIM]

    xapp = np.zeros((len(x), len(app)), dtype=np.float32)
    xapp[:, i] = 1.0
    xx = np.hstack((x, xapp))

    xx = torch.tensor(xx).to(device)

    recon = model(xx)
    if modelname == "vae":
        recon = recon[0]
    recon = recon.detach().cpu().numpy()

    for y in recon:
        xs, ys = smooth(y)
        axs[2].plot(xs, ys, ".-")
    axs[2].set_title("Generated (FG+TG)")

    for ax in axs.flat:
        ax.set(xlabel="Time (Normalized)", ylabel="\nIntensity (Normalized)")

    for ax in axs.flat:
        ax.label_outer()

    # plt.gcf().text(0.01, 0.55, app[i], fontsize=16, weight='bold', rotation=90, ha='center', va='center')
    plt.tight_layout()
    # plt.savefig('regen-%s-app%d-all.pdf'%(dtype, i))