(08/18/2023) Testing using neural nets to do regression. In particular, we are interested in learning the advection coefficient for the 1d RO-PDF of line energy marginals. In exact form, it looks like:
$$
    \mathcal{R}(t,\theta) = 
    2\mathbb{E}\bigg[
        b_{ij}^2v_t^{(i)}v_t^{(j)}\sin(\delta_t^{(i)}-\delta_t^{(j)})(w_t^{(i)} - w_t^{(j)}) |
        \Theta_t = \theta
    \bigg]
$$ where $i,j$ are fixed.

In [None]:
from PINN.PhysicsInformedROPDF import *
from PINN.utils.dnn import *
# Testing
import matplotlib.pyplot as plt
import torch
import numpy as np
import scipy
import mat73
import os

# set random seeds
np.random.seed(10)
torch.manual_seed(10);

# define training function
def train(inputs, outputs, model, optim, scheduler, batch_size, epochs, shuffle=True):
    X, y = inputs, outputs
    nx = X.shape[0]
    num_batches = int(nx/batch_size)
    all_losses = []
    for i in range(epochs):
        print("============================================================\n")
        print("Epoch = {}\n".format(i+1));
        print("============================================================\n")
        model.train()
        if shuffle:
            tmp = np.random.permutation(nx)
            X, y = X[tmp, :].data.clone(), y[tmp, :].data.clone()
        for idx in range(num_batches):
            if idx % 100 == 0:
                print("| => | Batch {} |\n".format(idx+1))
        # closure definition
            def closure():
                optim.zero_grad()
                start_idx = idx*batch_size
                end_idx = (idx+1)*batch_size
                if idx + 1 == num_batches:
                    # if last batch
                    end_idx = -1
                Xb, yb = X[start_idx:end_idx, :].data.clone(), y[start_idx:end_idx, :].data.clone()

                # require gradients
                Xb.requires_grad = True
                # make a prediction on the batch
                y_pred = model.forward(Xb)
                # compute L^2 loss
                loss = torch.mean((y_pred - yb)**2)
                # backpropagate
                loss.backward()
                if idx % 100 == 0:
                    print("==> Batch {} loss = {}".format(idx, loss.item()))
                all_losses.append(loss.item())
                return loss
            optim.step(closure=closure)
        if scheduler:
            # step scheduler after epoch if there is one
            scheduler.step()
            print("---------- \n")
            print("++ Learning rate reduced, now at = {}".format(scheduler.get_last_lr()[0]))
    return all_losses

In [None]:
# load data
data_path = "../../../matlab/data/case9_mc1d_coeff_data.mat"
data = mat73.loadmat(data_path)

In [None]:
t = np.linspace(0.0, 10.0, 1000+1)
print("number of time points = {}".format(len(t)))
# energy data (nsamples x nt)
x = data["mc_energy1"]
# response data (nsamples x nt)
y = data["mc_condexp_target1"]
print("data dimensions: ")
print("> input = {}".format(x.shape))
print("> output = {}".format(y.shape))
print("> total number of points for regression = {}".format(len(x.flatten())))

In [None]:
idx_time = 50
plt.figure(1);
plt.scatter(x[:, idx_time], y[:, idx_time], s=0.2, color="blue", alpha=0.6);
plt.xlabel(r"Input"); plt.ylabel(r"Response");

In [None]:
# create time dependent dataset

# number of MC trials used for training
mcro = [250, 1000, 2500, 5000, 7500, 10000]
# training neural net info
all_losses = []
# number of temporal points
nt = len(t)
tgrid = t[:nt]
# number of epoches for training
num_epochs = 100
for j in range(len(mcro)):
    nx = mcro[j]
    print("> Using mcro = {}".format(nx))
    # file name for saving model
    model_name = "./models/CASE9_NN_coeff1d_mcro{}".format(nx)
    if os.path.isfile(model_name):
        print("> Model {} already trained, skipping ...".format(model_name))
        continue
    X_data = []
    y_data = []
    # create Pytorch dataset
    for i in range(nt):
        t_i = tgrid[i]
        # append time to all points in x
        inputs_t = torch.tensor([t_i]).repeat(nx).reshape(-1, 1)
        inputs_x = torch.tensor(x[:nx, i]).reshape(-1, 1)
        outputs = torch.tensor(y[:nx, i]).reshape(-1, 1)
        X_i = torch.concat([inputs_t, inputs_x], dim=1)
        # append to full data
        X_data.append(X_i)
        y_data.append(outputs)
    X_data = torch.concat(X_data)
    y_data = torch.concat(y_data)
    print(">> Number of training points = {}".format(len(y_data)))
    # begin training using the same NN model
    nn_vanilla2d = DNN(layers=[2, 100, 100, 1])
    optim = torch.optim.Adam(
        nn_vanilla2d.parameters(),
        lr=8e-3
    )
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optim, gamma=0.9999)
    loss_vanilla = train(X_data, y_data, nn_vanilla2d, optim, scheduler, 2**10, num_epochs, shuffle=True)
    # after training, save model
    torch.save(nn_vanilla2d.state_dict(), model_name)
    print(">> Model {} saved! ".format(model_name))
    all_losses.append(loss_vanilla)

In [None]:
# re-process trained weights to be loadeded to MATLAB
mcro = [250, 1000, 2500, 5000, 7500, 10000]
for i in range(len(mcro)):
    nx = mcro[i]
    model_name = "./models/CASE9_NN_coeff1d_mcro{}".format(nx)
    model = DNN(layers=[2, 100, 100, 1])
    # save as .pt file for MATLAB
    model.load_state_dict(torch.load(model_name))
    # re-save model
    model_scripted = torch.jit.script(model)
    new_model_name = "./models/CASE9_NN_coeff1d_mcro{}.pt".format(nx)
    model_scripted.save(new_model_name) 
    print("> Model {} saved! ".format(new_model_name))

In [None]:
# predict coefficient on discrete grid and save
grid_data = scipy.io.loadmat("../../../matlab/data/CASE9_Space_Time_Grids")
xpts_e = grid_data["xpts_e"].flatten()
tgrid = grid_data["tt"].flatten()
nx = len(xpts_e)
nt = len(tgrid)
# load models, predict on grid, and save
mcro = [250, 1000, 2500, 5000, 7500, 10000]
for i in range(len(mcro)):
    mc = mcro[i]
    model_name = "./models/CASE9_NN_coeff1d_mcro{}".format(mc)
    print("> Model {} is loaded. ".format(model_name))
    model = DNN(layers=[2, 100, 100, 1])
    # save as .pt file for MATLAB
    model.load_state_dict(torch.load(model_name))
    # preallocate coeffcient array
    coeff = np.zeros([nt, nx])
    for j in range(nt):
        t_j = tgrid[j]
        # prepare data
        inputs_t = torch.tensor([t_j]).repeat(nx).reshape(-1, 1)
        inputs_x = torch.tensor(xpts_e).reshape(-1, 1)
        Xpred = torch.concat([inputs_t, inputs_x], dim=1)
        # predict
        ypred = model(Xpred).detach().numpy().flatten()
        # store
        coeff[j, :] = ypred
    # save 
    scipy.io.savemat("CASE9_MCRO_{}_DNN_Coeffs.mat".format(mc), {"coeff": coeff})

In [None]:
%matplotlib inline
import time
import pylab as pl
from IPython import display
for i in range(nt):
    pl.clf()
    pl.scatter(x[:1000, i], y[:1000, i], s=2.0, color="red");
    pl.plot(xpts_e, coeff[i, :], lw=2.0, color="black");
    display.display(pl.gcf())
    display.clear_output(wait=True)

In [None]:
nx = 500
nt = 500
# predict the function we learned at regular grid
xgrid = np.linspace(0, x.max(), nx)
xgrid = torch.tensor(xgrid)
tgrid = torch.tensor(t[:nt])
dt = tgrid[1]-tgrid[0]
# cartesian grid
X = cartesian_data(tgrid, xgrid)
# predict
y_pred = nn_vanilla2d(X)

In [None]:
# visualize contour
import matplotlib
plt.figure(1, figsize=(8, 6));
font = {'size'   : 16}
matplotlib.rc('font', **font)
y_pred2d = y_pred.reshape(nx, nt).detach().numpy().T
plt.pcolormesh(tgrid, xgrid, y_pred2d.T); plt.colorbar();
plt.xlabel(r"Time $t$"); plt.ylabel(r"Energy $v$");
plt.title("Vanilla NN");

In [None]:
import time
import pylab as pl
from IPython import display
from IPython.display import clear_output
for i in range(nt):
    plt.figure(1);
    plt.plot(xgrid, y_pred2d[i, :], color="black", label="NN");
    plt.plot(xgrid, y_pred_fourier2d[i, :], color="blue", label="FNN")
    plt.scatter(x[:, i], y[:, i], s=1.5, color="green", label="");
    
    # also compute a linear regressor, for comparison
    X = np.concatenate([np.ones([len(x[:, i]), 1]), x[:, i].reshape(-1, 1)], axis=1)
    Y = y[:, i].reshape(-1, 1)
    w = np.linalg.solve((X.T@X), (X.T@Y))
    linear_regression = w[0]+w[1]*xgrid.detach().numpy()
    plt.plot(xgrid, linear_regression, color="purple", label="linear");
    
    plt.legend(loc=(1.04, 0));
    display.clear_output(wait=True)
    display.display(pl.gcf())
    plt.title(r"$t = {}$".format(dt*(i+1)))
    plt.clf();
    time.sleep(0.01);

## 2 spatial dimensions

In [None]:
# load data
data_path = "../../../matlab/data/case9_mc.mat"
data = scipy.io.loadmat(data_path)
t = data["time"].flatten()
print("number of time points = {}".format(len(t)))
# energy data (nsamples x nt)
x1 = data["mc_energy1"]
x2 = data["mc_energy2"]
# response data (nsamples x nt)
y = data["mc_condexp_target1"]
# subsample
t_skip = 5
x_skip = 2
t = t[0:-1:t_skip]
x1 = x1[0:-1:x_skip, 0:-1:t_skip]
x2 = x2[0:-1:x_skip, 0:-1:t_skip]
y = y[0:-1:x_skip, 0:-1:t_skip]
print("data dimensions: ")
print("> input1 = {}".format(x1.shape))
print("> input2 = {}".format(x2.shape))
print("> output = {}".format(y.shape))
print("> total number of points for regression = {}".format(len(x1.flatten())))

In [None]:
# create time dependent dataset
nt = len(t)
tgrid = t[:nt]
nx = x1.shape[0]
X_data = []
y_data = []
for i in range(nt):
    t_i = tgrid[i]
    # append time to all points in x
    inputs_t = torch.tensor([t_i]).repeat(nx).reshape(-1, 1)
    inputs_x1 = torch.tensor(x1[:, i]).reshape(-1, 1)
    inputs_x2 = torch.tensor(x2[:, i]).reshape(-1, 1)
    outputs = torch.tensor(y[:, i]).reshape(-1, 1)
    X_i = torch.concat([inputs_t, inputs_x1, inputs_x2], dim=1)
    # append to full data
    X_data.append(X_i)
    y_data.append(outputs)
X_data = torch.concat(X_data)
y_data = torch.concat(y_data)

def train(inputs, outputs, model, optim, scheduler, batch_size, epochs, shuffle=True):
    X, y = inputs, outputs
    nx = X.shape[0]
    num_batches = int(nx/batch_size)
    all_losses = []
    for i in range(epochs):
        print("============================================================\n")
        print("Epoch = {}\n".format(i+1));
        print("============================================================\n")
        model.train()
        if shuffle:
            tmp = np.random.permutation(nx)
            X, y = X[tmp, :].data.clone(), y[tmp, :].data.clone()
        for idx in range(num_batches):
            if idx % 100 == 0:
                print("| => | Batch {} |\n".format(idx+1))
        # closure definition
            def closure():
                optim.zero_grad()
                start_idx = idx*batch_size
                end_idx = (idx+1)*batch_size
                if idx + 1 == num_batches:
                    # if last batch
                    end_idx = -1
                Xb, yb = X[start_idx:end_idx, :].data.clone(), y[start_idx:end_idx, :].data.clone()

                # require gradients
                Xb.requires_grad = True
                # make a prediction on the batch
                y_pred = model.forward(Xb)
                # compute L^2 loss
                loss = torch.mean((y_pred - yb)**2)
                # backpropagate
                loss.backward()
                if idx % 100 == 0:
                    print("==> Batch {} loss = {}".format(idx, loss.item()))
                all_losses.append(loss.item())
                return loss
            optim.step(closure=closure)
        if scheduler:
            # step scheduler after epoch if there is one
            scheduler.step()
            print("---------- \n")
            print("++ Learning rate reduced, now at = {}".format(scheduler.get_last_lr()[0]))
    return all_losses

In [None]:
# train using vanilla net
nn_vanilla2d = DNN(layers=[3, 150, 150, 1])
optim = torch.optim.Adam(
    nn_vanilla2d.parameters(),
    lr=8e-3
)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optim, gamma=0.9999)
loss_vanilla = train(X_data, y_data, nn_vanilla2d, optim, scheduler, 2**9, 30, shuffle=True)

In [None]:
import time
import pylab as pl
from IPython import display
from IPython.display import clear_output

nx, ny = 500, 500
nt = 500
# predict the function we learned at regular grid
xgrid = np.linspace(0, x1.max(), nx)
ygrid = np.linspace(0, x2.max(), ny)
xgrid = torch.tensor(xgrid)
ygrid = torch.tensor(ygrid)
tgrid = torch.tensor(t[:nt])
dt = tgrid[1]-tgrid[0]
# cartesian grid in space, time is appended in loop
X = cartesian_data(xgrid, ygrid)
num_points = X.shape[0]
# predict
all_y_pred = torch.zeros([nx, ny, nt])

# for plotting
xmesh, ymesh = np.meshgrid(xgrid, ygrid)
for i in range(nt):
    # get time
    fig = plt.figure(1, figsize=(20, 10));
    ax = fig.gca(projection='3d')
    tmp = tgrid[i]
    Xnew = torch.concat([tmp.repeat(X.shape[0]).reshape(-1, 1), X], dim=1)
    # predict - vanilla neural net
    tmp = nn_vanilla2d(Xnew)
    y_pred2d_vanilla = tmp.reshape([nx, ny]).detach().numpy()
    im = ax.plot_surface(ymesh, xmesh, y_pred2d_vanilla, cmap=matplotlib.cm.coolwarm, antialiased=False, alpha=0.7);
    cax = fig.add_axes([0.27, 0.8, 0.5, 0.05])
    fig.colorbar(im, cax=cax, orientation='horizontal')
    
    # predict - linear regression
    Xlinear = np.concatenate([np.ones([len(x1[:, i]), 1]), x1[:, i].reshape(-1, 1), \
                             x2[:, i].reshape(-1, 1)], axis=1)
    Ylinear = y[:, i].reshape(-1, 1)
    wlinear = np.linalg.solve((Xlinear.T@Xlinear), (Xlinear.T@Ylinear))
    linear_regression = wlinear[0]+np.outer(
        wlinear[1]*xgrid.detach().numpy().reshape(-1, 1),
        wlinear[2]*ygrid.detach().numpy().reshape(-1, 1)
    )
    #im2 = ax.plot_surface(ymesh, xmesh, linear_regression, cmap=matplotlib.cm.afmhot, antialiased=False);
    
    # visualize scatter data
    ax.scatter(x1[:, i], x2[:, i], y[:, i], s=4, color="green")
    
    
    # movie
    display.clear_output(wait=True)
    display.display(pl.gcf())
    plt.title(r"$t = {}$".format(dt*(i+1)))
    plt.clf();

# 2d Regression

Try doing regression for all coefficients simultaneously (i.e. output of NN is 2d)

In [None]:
# load data
data_path = "../../../matlab/data/case9_mc.mat"
data = scipy.io.loadmat(data_path)
t = data["time"].flatten()
print("number of time points = {}".format(len(t)))
# energy data (nsamples x nt)
x1 = data["mc_energy1"]
x2 = data["mc_energy2"]
# response data (nsamples x nt)
y1 = data["mc_condexp_target1"]
y2 = data["mc_condexp_target2"]
# subsample
t_skip = 5
x_skip = 2
t = t[0:-1:t_skip]
x1 = x1[0:-1:x_skip, 0:-1:t_skip]
x2 = x2[0:-1:x_skip, 0:-1:t_skip]
y1 = y1[0:-1:x_skip, 0:-1:t_skip]
y2 = y2[0:-1:x_skip, 0:-1:t_skip]
print("data dimensions: ")
print("> input1 = {}".format(x1.shape))
print("> input2 = {}".format(x2.shape))
print("> output1 = {}".format(y1.shape))
print("> output2 = {}".format(y2.shape))
print("> total number of points for regression = {}".format(len(x1.flatten())))