# It trains MF-DeepONet for 2D time dependent Allen Cahn equation
### HF data size = 20 samples, with 50 time steps 1000 points

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter

import matplotlib.pyplot as plt
from utils import *

from timeit import default_timer

In [2]:
torch.manual_seed(0)
np.random.seed(0)

# DeepONet

In [3]:
class Deeponet(nn.Module):
    def __init__(self, branchnet, trunknetdepth, width, space_dim):
        super(Deeponet, self).__init__()

        s_dim = space_dim
        tlayers = []
        blayers = [] 
        for i in range(len(branchnet)-2):
            blayers.append(nn.Conv3d(branchnet[i],branchnet[i+1],kernel_size=3,stride=2)) 
            blayers.append(nn.ReLU(inplace=True))
        blayers.append(nn.Conv3d(branchnet[-2],branchnet[-1],kernel_size=2,stride=2)) 
        blayers.append(nn.ReLU(inplace=True))
        blayers.append(nn.Flatten())
        blayers.append(nn.Linear(80,width))
        
        for i in range(trunknetdepth):
            tlayers.append(nn.Linear(s_dim,width)) 
            tlayers.append(nn.ReLU(inplace=True))
            s_dim = width
        
        self.branchnet = nn.Sequential(*blayers)
        self.trunknet = nn.Sequential(*tlayers)
        self.bias = nn.Linear(1,1)
  
    def hadprodsum(self, branch, trunk):
        return torch.einsum("ij,ij->i", branch, trunk)

    def forward(self, xb, xt):
        x1 = self.branchnet(xb)
        x2 = self.trunknet(xt)
        x  = self.hadprodsum(x1,x2)
        x = x.view(-1,1)
        x  = self.bias(x)      
        return x
        

# Training and Data

In [4]:
ntrain = 20
ntest = 1
nreliability = 2000
ntotal = ntrain + ntest
epochs = 50
lst = ntrain
batch_size = 100
side = 65

n_total = ntrain + ntest
learning_rate = 0.001

r = 6
s = int(side/r) + 1


In [5]:
s = int(side/r) + 1
# s = 13

r_test = 1
s_test = side
print(s, s_test)

11 65


In [6]:
path = 'data/ac2dlowhighres_1.mat'
reader = MatReader(path)
u_low = np.array(reader.read_field('ulr_nextstep'))
u_high = np.array(reader.read_field('uhr'))
x_coord, y_coord, t_coord = np.meshgrid( reader.read_field('x')[:, ::r], reader.read_field('y')[:, ::r], reader.read_field('time') )
time = reader.read_field('time')

x_coord_test, y_coord_test, t_coord_test = np.meshgrid( reader.read_field('x'), reader.read_field('y'), reader.read_field('time') )


In [7]:
print(u_low.shape, u_high.shape)

(4000, 50, 65, 65) (4000, 51, 65, 65)


In [8]:
coords = np.stack((x_coord[..., :50].flatten(), y_coord[..., :50].flatten(), t_coord[..., :50].flatten()), axis=-1)
print(coords.shape)

coords_test = np.stack((x_coord_test[..., :50].flatten(), y_coord_test[..., :50].flatten(), t_coord_test[..., :50].flatten()), axis=-1)
print(coords_test.shape)


(6050, 3)
(211250, 3)


In [9]:
x_or_h = u_high[:ntrain,:-1, ::r,::r] 
y_or_h = u_high[:ntrain,1:, ::r,::r] 
y_or_l = u_low[:ntrain, :, ::r, ::r] 

x_or_h_test_r = u_high[:ntest,:-1, ::r,::r] 
y_or_l_test_r = u_low[:ntest, :, ::r, ::r] 

y_or_h_test = u_high[:ntest, 1:,::r_test, ::r_test] 
y_or_l_test = u_low[:ntest, :, ::r_test, ::r_test] 

print(x_or_h.shape, y_or_h.shape, y_or_l.shape, y_or_h_test.shape, y_or_l_test.shape)

(20, 50, 11, 11) (20, 50, 11, 11) (20, 50, 11, 11) (1, 50, 65, 65) (1, 50, 65, 65)


In [10]:
# Create the input and output (residual) dataset
x_mf = np.stack((x_or_h, y_or_l), axis=-1)
y_mf = y_or_h - y_or_l.reshape((ntrain,time.shape[1]-1,s,s))

x_mf_test = np.stack((x_or_h_test_r, y_or_l_test_r), axis=-1)
y_mf_test = y_or_h_test - y_or_l_test.reshape((ntest,time.shape[1]-1,s_test,s_test))

print(x_mf.shape, y_mf.shape, x_mf_test.shape, y_mf_test.shape)

(20, 50, 11, 11, 2) (20, 50, 11, 11) (1, 50, 11, 11, 2) (1, 50, 65, 65)


In [11]:
# Split the training and testing datasets
xb_train_mf, y_train_mf = x_mf[:ntrain, ...].transpose(0,4,1,2,3), y_mf[:ntrain, ...] 
xb_test_mf, y_test_mf = x_mf_test[:ntest, ...].transpose(0,4,1,2,3), y_mf_test[:ntest, ...] 

print(xb_train_mf.shape, y_train_mf.shape, xb_test_mf.shape, y_test_mf.shape)


(20, 2, 50, 11, 11) (20, 50, 11, 11) (1, 2, 50, 11, 11) (1, 50, 65, 65)


In [12]:
# Read data:
xb_train_mf  = np.repeat(xb_train_mf.astype(np.float32), coords.shape[0], axis=0)
xt_train_mf =  np.tile(coords.astype(np.float32), (ntrain,1))
y_train_mf  =  y_train_mf.astype(np.float32).reshape(-1,1)

xb_test_mf  = np.repeat(xb_test_mf.astype(np.float32), coords_test.shape[0], axis=0)
xt_test_mf =  np.tile(coords_test.astype(np.float32), (ntest,1))
y_test_mf  =  y_test_mf.astype(np.float32).reshape(-1,1)

In [13]:
print(xb_train_mf.shape, xt_train_mf.shape, y_train_mf.shape, xb_test_mf.shape, xt_test_mf.shape, y_test_mf.shape)


(121000, 2, 50, 11, 11) (121000, 3) (121000, 1) (211250, 2, 50, 11, 11) (211250, 3) (211250, 1)


In [14]:
# Define the dataloaders

xb_train_mf = torch.from_numpy(xb_train_mf)
xt_train_mf = torch.from_numpy(xt_train_mf)
xb_test_mf = torch.from_numpy(xb_test_mf)
xt_test_mf = torch.from_numpy(xt_test_mf)
y_train_mf = torch.from_numpy(y_train_mf)
y_test_mf = torch.from_numpy(y_test_mf)

train_loader_mf = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(xb_train_mf, xt_train_mf, y_train_mf),
                                              batch_size=batch_size, shuffle=True)
test_loader_mf = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(xb_test_mf, xt_test_mf, y_test_mf),
                                             batch_size=batch_size, shuffle=False)


# MF Model

In [15]:
# model
# bdepth = 3
blayers = [xb_train_mf.shape[1], 16, 32, 16]
tdepth = 3
width = 20

inputsizeb = xb_train_mf.shape[-1]
spdim = xt_train_mf.shape[-1]

model = Deeponet(blayers, tdepth, width, spdim).cuda()

# if trained model is already available
# model = torch.load('model/deeponet_Allen_Cahn_10')

print(count_params(model))

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


21390


In [16]:
for ep in range(epochs):
    model.train()
    t1 = default_timer()
    train_mse = 0
    train_l2 = 0
    for xb,xt, y in train_loader_mf:
        xb,xt, y = xb.cuda(), xt.cuda(), y.cuda()
        
        optimizer.zero_grad()
        out = model(xb, xt)

        mse = F.mse_loss(out.view(out.shape[0], -1), y.view(out.shape[0], -1), reduction='mean')
        mse.backward() # use the l2 relative loss

        optimizer.step()
        train_mse += mse.item()

    model.eval()
    test_mse = 0
    with torch.no_grad():
        for xb,xt, y in test_loader_mf:
            xb,xt, y = xb.cuda(), xt.cuda(), y.cuda()

            out = model(xb, xt)
            tmse = F.mse_loss(out.view(out.shape[0], -1), y.view(out.shape[0], -1), reduction='mean')
            test_mse += tmse.item()

    train_mse /= len(train_loader_mf)
    test_mse /= len(test_loader_mf)
    t2 = default_timer()
    print(f'epoch {ep}, time_taken: {t2-t1}, train_mse: {train_mse},test_mse: {test_mse}')


epoch 0, time_taken: 21.727362303994596, train_mse: 0.006339156749987329,test_mse: 0.00591062257464002
epoch 1, time_taken: 19.473189159994945, train_mse: 0.0030183341871807064,test_mse: 0.0058127859615203685
epoch 2, time_taken: 20.345944190165028, train_mse: 0.0029975507919080577,test_mse: 0.005816880777217675
epoch 3, time_taken: 20.117357693146914, train_mse: 0.0029691137477535707,test_mse: 0.005796475022720996
epoch 4, time_taken: 19.81907417019829, train_mse: 0.002936429219205685,test_mse: 0.005795304217684883
epoch 5, time_taken: 20.163219870999455, train_mse: 0.002936152902526554,test_mse: 0.006400909321867476
epoch 6, time_taken: 17.878504429012537, train_mse: 0.0029080599777158525,test_mse: 0.005832636065737619
epoch 7, time_taken: 18.738379662157968, train_mse: 0.002888157828834845,test_mse: 0.006147619090369996
epoch 8, time_taken: 17.760732654947788, train_mse: 0.0028656935792422105,test_mse: 0.005889427847306297
epoch 9, time_taken: 19.04607144300826, train_mse: 0.0028443

KeyboardInterrupt: 

In [None]:
# Save the MF-DeepONet model

# torch.save(model, 'model/deeponet_Allen_Cahn_10')

In [None]:
# If the training is done, delete some variables for memory
del xb_train_mf, xt_train_mf, y_train_mf, xb_test_mf, xt_test_mf, y_test_mf


In [None]:
batch = 2
data_range = np.arange(0,nreliability,batch)


In [None]:
# Prediction:
pred_mf = [] 
actual = []

for i in range(len(data_range)):
    print('Data_range-{}'.format(i))

    x_or_h_test_r = u_high[data_range[i]:data_range[i]+batch,:-1, ::r,::r] 
    y_or_l_test_r = u_low[data_range[i]:data_range[i]+batch, :, ::r, ::r] 
    y_or_h_test = u_high[data_range[i]:data_range[i]+batch, 1:,::r_test, ::r_test] 
    y_or_l_test = u_low[data_range[i]:data_range[i]+batch, :, ::r_test, ::r_test] 

    # Create the input and output (residual) dataset
    x_mf_test = np.stack((x_or_h_test_r, y_or_l_test_r), axis=-1)
    y_test_mf = y_or_h_test - y_or_l_test.reshape((batch,time.shape[1]-1,s_test,s_test))

    # Split the training and testing datasets
    xb_test_mf = x_mf_test.transpose(0,4,1,2,3) 

    xb_test_mf  = np.repeat(xb_test_mf.astype(np.float32), coords_test.shape[0], axis=0)
    xt_test_mf =  np.tile(coords_test.astype(np.float32), (batch,1))
    y_test_mf  =  y_test_mf.astype(np.float32).reshape(-1,1)

    # Define the dataloaders
    xb_test_mf = torch.from_numpy(xb_test_mf)
    xt_test_mf = torch.from_numpy(xt_test_mf)
    y_test_mf = torch.from_numpy(y_test_mf)
    
    test_loader_mf = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(xb_test_mf, xt_test_mf, y_test_mf),
                                                 batch_size=batch_size, shuffle=False)

    with torch.no_grad():
        index = 0
        for xb, xt, y in test_loader_mf:
            tmse = 0
            xb, xt, y = xb.cuda(), xt.cuda(), y.cuda()
    
            out = model(xb, xt)
            tmse = F.mse_loss(out.view(out.shape[0], -1), y.view(out.shape[0], -1), reduction='mean').item()
    
            pred_mf.append( out.cpu() )
            actual.append( y.cpu() )
            # print("Data-Range-{}, Batch-{}, Test-loss-{:0.6f}".format( data_range[i], index, tmse ))
            index += 1
    
actual = torch.cat(( actual ))
pred_mf = torch.cat(( pred_mf ))
print('Mean mse_mf-{}'.format(F.mse_loss(actual, pred_mf).item()))


In [None]:
pred_mf = pred_mf.reshape(nreliability, s, s)
actual = actual.reshape(nreliability, s, s)

print(pred_mf.shape, actual.shape)

In [None]:
# Add the residual operator to LF-dataset 

real_mf = y_test_mf.reshape(ntest,50,s,s) + u_low 
output_mf = pred_mf.reshape(ntest,50,s,s) + u_low 


In [None]:
print(real_mf.shape, output_mf.shape)

In [None]:
y_test_mf.reshape(ntest, 50, 17, 17).shape

In [None]:
pred_mf.shape

In [None]:
mse_pred = F.mse_loss(output_mf, real_mf).item()
mse_LF = F.mse_loss(real_mf, torch.from_numpy(x_mf[-ntest:, ..., 1])).item()
mse_residual = F.mse_loss(y_test_mf, pred_mf)

print('MSE-Predicted solution-{:0.4f}, MSE-LF Data-{:0.4f}, MSE-Residual-{:0.4f}'
      .format(mse_pred, mse_LF, mse_residual))


In [None]:
fig4, axs = plt.subplots(nrows=3, ncols=5, figsize=(16, 6), facecolor='w', edgecolor='k')
fig4.subplots_adjust(hspace=0.35, wspace=0.2)

fig4.suptitle(f'Predictions MFWNO AC2d Size', fontsize=16)
sample = 0
index = 0 
for i in range(50):
    if i % 10 == 0:
        im = axs[0, index].imshow(real_mf[sample, i, :, :], cmap='jet', vmin=-1, vmax=1)
        plt.colorbar(im, ax=axs[0, index])
        im = axs[1, index].imshow(output_mf[sample, i, :, :], cmap='jet', vmin=-1, vmax=1)
        plt.colorbar(im, ax=axs[1, index])
        im = axs[2, index].imshow(torch.abs(real_mf[sample, i, :, :] - output_mf[sample, i, :, :]),
                                    cmap='jet')
        plt.colorbar(im, ax=axs[2, index])
        index += 1
        