In [15]:
import torch
import torch.nn as nn
from model import dataset
from torch.autograd import grad
import numpy as np
from model.model import PDETransformer
from collections import defaultdict
from bcics.boundary_conditions import DirichletBC
from bcics.initial_conditions import IC
from tqdm import tqdm

In [16]:
def relative_l2_error(A, B):
    l2_error = torch.norm(A - B)
    l2_norm_A = torch.norm(A)
    
    # To avoid division by zero, add a small constant (e.g., 1e-8)
    epsilon = 1e-8
    relative_error = l2_error / (l2_norm_A + epsilon)
    
    return relative_error

In [17]:
def gen_testdata():
    data = np.load("./data/Burgers.npz")
    t, x, exact = data["t"], data["x"], data["usol"].T
    xx, tt = np.meshgrid(x, t)
    X = np.vstack((np.ravel(xx), np.ravel(tt))).T
    y = exact.flatten()[:, None]
    return X, y

In [18]:
X, y = gen_testdata()

In [19]:
select_index = np.random.randint(X.shape[0], size=2048)
# X_train = torch.tensor(X[select_index, :],dtype=torch.float32)
# y_train = torch.tensor(y[select_index, :],dtype=torch.float32)

In [20]:
def pde(x, y):
    dy_x = grad(y, x, grad_outputs=torch.ones_like(y), retain_graph=True, create_graph=True)[0][:, :, 0:1]
    dy_t = grad(y, x, grad_outputs=torch.ones_like(y), retain_graph=True, create_graph=True)[0][:, :, 1:]
    dy_xx = grad(dy_x, x, grad_outputs=torch.ones_like(y), retain_graph=True, create_graph=True)[0][:, :, 0:1]
    return dy_t + y * dy_x - 0.01 / np.pi * dy_xx

In [21]:
geom = [(-1, 1), (0, 0.99)]

bc_1 = DirichletBC(geom, boundary_dim=0, boundary_point=-1, time_dim=True, func=lambda x: torch.zeros(list(x.shape[:-1])+[1]).to("cuda"))
bc_2 = DirichletBC(geom, boundary_dim=0, boundary_point=1, time_dim=True, func=lambda x: torch.zeros(list(x.shape[:-1])+[1]).to("cuda"))
ic = IC(geom, lambda x: -torch.sin(np.pi * x[:, :, 0:1]))

In [68]:
config = defaultdict(num_dim=2,
                         n_targets=1,
                         n_hidden=64,
                         num_feat_layers=3,
                         num_encoder_layers=2,
                         activation='tanh',
                         n_head=2,
                         dim_feedforward=64,
                         attention_type='fourier',  # no softmax
                         layer_norm=False,
                         attn_norm=False,
                         attn_norm_type='layer',
                         batch_norm=True,
                         spacial_residual=True,
                         num_regressor_layers=3,
                         spacial_fc=False,
                         return_attn_weight=True)

In [69]:
pinn_dataset = dataset.pinn_collect_dataset(num_collect=2048, geom=geom, time_dim=True,
                 distribution='random', given_data=False)
collect_data = pinn_dataset.prepare_collection_data()

In [70]:
collect_data = pinn_dataset.prepare_collection_data()

boundary_data_1 = np.random.uniform(0, 1, (80, 1))
start, end= geom[1]
boundary_data_1[:, 0] = boundary_data_1[:, 0] * (end - start) + start
new_col = np.full((boundary_data_1.shape[0], 1), -1.)
boundary_data_1 = np.hstack((new_col, boundary_data_1))
boundary_data_1 = torch.tensor(boundary_data_1, dtype=torch.float32)

boundary_data_2 = np.random.uniform(0, 1, (80, 1))
start, end= geom[1]
boundary_data_2[:, 0] = boundary_data_2[:, 0] * (end - start) + start
new_col = np.full((boundary_data_2.shape[0], 1), 1.)
boundary_data_2 = np.hstack((new_col, boundary_data_2))
boundary_data_2 = torch.tensor(boundary_data_2, dtype=torch.float32)

initial_data = np.random.uniform(0, 1, (160, 1))
start, end= geom[0]
initial_data[:, 0] = initial_data[:, 0] * (end - start) + start
new_col = np.full((initial_data.shape[0], 1), 0.)
initial_data = np.hstack((initial_data, new_col))
initial_data = torch.tensor(initial_data, dtype=torch.float32)

data_list = [collect_data,boundary_data_1, boundary_data_2, initial_data]
# data_list = [X_train,boundary_data_1, boundary_data_2, initial_data]
all_data = torch.cat(data_list, dim=0)


sizes = [tensor.size(0) for tensor in data_list]
begin_indices = [sum(sizes[:i]) for i in range(len(sizes))]
end_indices = [sum(sizes[:i+1]) for i in range(len(sizes))]

In [71]:
boundary_data_2 = np.random.uniform(0, 1, (80, 1))
start, end= geom[1]
boundary_data_2[:, 0] = boundary_data_2[:, 0] * (end - start) + start
new_col = np.full((boundary_data_2.shape[0], 1), 1.)
boundary_data_2 = np.hstack((new_col, boundary_data_2))
boundary_data_2 = torch.tensor(boundary_data_2, dtype=torch.float32)

In [72]:
initial_data = np.random.uniform(0, 1, (160, 1))
start, end= geom[0]
initial_data[:, 0] = initial_data[:, 0] * (end - start) + start
new_col = np.full((initial_data.shape[0], 1), 0.)
initial_data = np.hstack((initial_data, new_col))
initial_data = torch.tensor(initial_data, dtype=torch.float32)

In [73]:
data_list = [collect_data,boundary_data_1, boundary_data_2, initial_data]
# data_list = [X_train,boundary_data_1, boundary_data_2, initial_data]
all_data = torch.cat(data_list, dim=0)

In [74]:


sizes = [tensor.size(0) for tensor in data_list]
begin_indices = [sum(sizes[:i]) for i in range(len(sizes))]
end_indices = [sum(sizes[:i+1]) for i in range(len(sizes))]

In [75]:
begin_indices

[0, 2048, 2128, 2208]

In [83]:
net = PDETransformer(**config)
net.to("cuda:0")
max_iter_adam = 2000
lr = 1e-2
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
MSE = nn.MSELoss()

In [84]:
from torch.optim.lr_scheduler import CosineAnnealingLR, ExponentialLR
scheduler = CosineAnnealingLR(optimizer, max_iter_adam, 1e-5)

In [85]:
def get_data(batch=8):
    all_data_list = []
    for b in range(batch):
        collect_data = pinn_dataset.prepare_collection_data()

        boundary_data_1 = np.random.uniform(0, 1, (80, 1))
        start, end= geom[1]
        boundary_data_1[:, 0] = boundary_data_1[:, 0] * (end - start) + start
        new_col = np.full((boundary_data_1.shape[0], 1), -1.)
        boundary_data_1 = np.hstack((new_col, boundary_data_1))
        boundary_data_1 = torch.tensor(boundary_data_1, dtype=torch.float32)

        boundary_data_2 = np.random.uniform(0, 1, (80, 1))
        start, end= geom[1]
        boundary_data_2[:, 0] = boundary_data_2[:, 0] * (end - start) + start
        new_col = np.full((boundary_data_2.shape[0], 1), 1.)
        boundary_data_2 = np.hstack((new_col, boundary_data_2))
        boundary_data_2 = torch.tensor(boundary_data_2, dtype=torch.float32)

        initial_data = np.random.uniform(0, 1, (160, 1))
        start, end= geom[0]
        initial_data[:, 0] = initial_data[:, 0] * (end - start) + start
        new_col = np.full((initial_data.shape[0], 1), 0.)
        initial_data = np.hstack((initial_data, new_col))
        initial_data = torch.tensor(initial_data, dtype=torch.float32)

        data_list = [collect_data,boundary_data_1, boundary_data_2, initial_data]
        # data_list = [X_train,boundary_data_1, boundary_data_2, initial_data]
        all_data = torch.cat(data_list, dim=0).unsqueeze(0)
        
        all_data_list.append(all_data)
    
    batch_data = torch.concat(all_data_list, dim=0)
    sizes = [tensor.size(0) for tensor in data_list]
    begin_indices = [sum(sizes[:i]) for i in range(len(sizes))]
    end_indices = [sum(sizes[:i+1]) for i in range(len(sizes))]
    return batch_data

In [79]:
for i in range(1, max_iter_adam+1):
    
    batch_data = get_data(batch=4)
    
    input_data = batch_data.to('cuda:0') 
    indices = torch.arange(input_data.size(1))
    shuffled_indices = indices[torch.randperm(len(indices))]
    input_data = input_data[:, shuffled_indices]
    
    input_data.requires_grad=True
    output, weight_list = net(input_data)
    # print(output.shape)
    
#     loss_map=0
    
#     for weight in weight_list:
#         weight=weight.squeeze(0)
#         for index in range(weight.size(0)):
#             loss_tmp = MSE(output.squeeze(0), torch.mm(weight[index], output.squeeze(0)))
#             loss_map+=loss_tmp
    
    
    
    pde_results= pde(input_data, output)
    loss_res = MSE(pde_results, torch.zeros_like(pde_results))
    
    output=output[:,shuffled_indices.argsort()]
    input_data=input_data[:,shuffled_indices.argsort()]
    
    bc_error_1 = bc_1.error(input_data[:, begin_indices[1]:end_indices[1]],output[:, begin_indices[1]:end_indices[1]])
    
    
    loss_bc_1 = MSE(bc_error_1, torch.zeros_like(bc_error_1))
    
    bc_error_2 = bc_2.error(input_data[:, begin_indices[2]:end_indices[2]],output[:, begin_indices[2]:end_indices[2]])
    loss_bc_2 = MSE(bc_error_2, torch.zeros_like(bc_error_2))
    
    ic_error = ic.error(input_data[:, begin_indices[3]:end_indices[3]],output[:, begin_indices[3]:end_indices[3]])
    loss_ic = MSE(ic_error, torch.zeros_like(ic_error))
    
    loss= loss_res+loss_bc_1+loss_bc_2+loss_ic #+ loss_map
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    scheduler.step()
    if i%10 ==0:
        print('Epoch:%d \t Loss:%.8f \t Loss_pde:%.8f \t Loss_bc_1:%.8f \t Loss_bc_2:%.8f \t Loss_ic:%.8f'%(i,loss.detach().cpu().numpy(), loss_res.detach().cpu().numpy(), loss_bc_1.detach().cpu().numpy(), loss_bc_2.detach().cpu().numpy(), loss_ic.detach().cpu().numpy()))

Epoch:10 	 Loss:0.23006508 	 Loss_pde:0.01010154 	 Loss_bc_1:0.01668563 	 Loss_bc_2:0.01792203 	 Loss_ic:0.18535587
Epoch:20 	 Loss:0.15993375 	 Loss_pde:0.02500059 	 Loss_bc_1:0.01351917 	 Loss_bc_2:0.01396872 	 Loss_ic:0.10744525
Epoch:30 	 Loss:0.08302228 	 Loss_pde:0.02471663 	 Loss_bc_1:0.00737864 	 Loss_bc_2:0.00379564 	 Loss_ic:0.04713137
Epoch:40 	 Loss:0.04270671 	 Loss_pde:0.01993411 	 Loss_bc_1:0.00247461 	 Loss_bc_2:0.00993042 	 Loss_ic:0.01036757
Epoch:50 	 Loss:0.02109241 	 Loss_pde:0.01085589 	 Loss_bc_1:0.00392638 	 Loss_bc_2:0.00147624 	 Loss_ic:0.00483390
Epoch:60 	 Loss:0.00919404 	 Loss_pde:0.00544563 	 Loss_bc_1:0.00056832 	 Loss_bc_2:0.00188784 	 Loss_ic:0.00129226
Epoch:70 	 Loss:0.00676458 	 Loss_pde:0.00325604 	 Loss_bc_1:0.00171937 	 Loss_bc_2:0.00101040 	 Loss_ic:0.00077877
Epoch:80 	 Loss:0.00356414 	 Loss_pde:0.00213791 	 Loss_bc_1:0.00038582 	 Loss_bc_2:0.00069345 	 Loss_ic:0.00034695
Epoch:90 	 Loss:0.00601090 	 Loss_pde:0.00200363 	 Loss_bc_1:0.00142950 

KeyboardInterrupt: 

In [86]:
def fixed_data(batch=8):
    all_data_list = []
    indices = torch.arange(X.shape[0])
    shuffled_indices = indices[torch.randperm(len(indices))]
    X_tensor = torch.tensor(X,dtype=torch.float32)[shuffled_indices]
    all_collect_data = X_tensor.unsqueeze(0).reshape(batch, 3200,-1)
    
    for b in range(batch):   
        collect_data = all_collect_data[b]
        
        boundary_data_1 = np.random.uniform(0, 1, (80, 1))
        start, end= geom[1]
        boundary_data_1[:, 0] = boundary_data_1[:, 0] * (end - start) + start
        new_col = np.full((boundary_data_1.shape[0], 1), -1.)
        boundary_data_1 = np.hstack((new_col, boundary_data_1))
        boundary_data_1 = torch.tensor(boundary_data_1, dtype=torch.float32)

        boundary_data_2 = np.random.uniform(0, 1, (80, 1))
        start, end= geom[1]
        boundary_data_2[:, 0] = boundary_data_2[:, 0] * (end - start) + start
        new_col = np.full((boundary_data_2.shape[0], 1), 1.)
        boundary_data_2 = np.hstack((new_col, boundary_data_2))
        boundary_data_2 = torch.tensor(boundary_data_2, dtype=torch.float32)

        initial_data = np.random.uniform(0, 1, (160, 1))
        start, end= geom[0]
        initial_data[:, 0] = initial_data[:, 0] * (end - start) + start
        new_col = np.full((initial_data.shape[0], 1), 0.)
        initial_data = np.hstack((initial_data, new_col))
        initial_data = torch.tensor(initial_data, dtype=torch.float32)

        data_list = [collect_data, boundary_data_1, boundary_data_2, initial_data]
        # data_list = [X_train,boundary_data_1, boundary_data_2, initial_data]
        
        all_data = torch.cat(data_list, dim=0).unsqueeze(0)
        

        all_data_list.append(all_data)
    
    batch_data = torch.concat(all_data_list, dim=0)
    sizes = [tensor.size(0) for tensor in data_list]
    begin_indices = [sum(sizes[:i]) for i in range(len(sizes))]
    end_indices = [sum(sizes[:i+1]) for i in range(len(sizes))]
    return batch_data, shuffled_indices, begin_indices, end_indices

In [87]:
batch_data, shuffled_indices, begin_indices, end_indices =fixed_data(batch=8)

for i in range(1, max_iter_adam+1):
    
    input_data = batch_data.to('cuda:0') 
    input_data.requires_grad=True
    output, weight_list = net(input_data)
    # print(output.shape)
    
#     loss_map=0
    
#     for weight in weight_list:
#         weight=weight.squeeze(0)
#         for index in range(weight.size(0)):
#             loss_tmp = MSE(output.squeeze(0), torch.mm(weight[index], output.squeeze(0)))
#             loss_map+=loss_tmp
    
    
    
    pde_results= pde(input_data, output)
    loss_res = MSE(pde_results, torch.zeros_like(pde_results))
    
    # output=output[:,shuffled_indices.argsort()]
    # input_data=input_data[:,shuffled_indices.argsort()]
    
    bc_error_1 = bc_1.error(input_data[:, begin_indices[1]:end_indices[1]],output[:, begin_indices[1]:end_indices[1]])
    
    
    loss_bc_1 = MSE(bc_error_1, torch.zeros_like(bc_error_1))
    
    bc_error_2 = bc_2.error(input_data[:, begin_indices[2]:end_indices[2]],output[:, begin_indices[2]:end_indices[2]])
    loss_bc_2 = MSE(bc_error_2, torch.zeros_like(bc_error_2))
    
    ic_error = ic.error(input_data[:, begin_indices[3]:end_indices[3]],output[:, begin_indices[3]:end_indices[3]])
    loss_ic = MSE(ic_error, torch.zeros_like(ic_error))
    
    loss= loss_res+loss_bc_1+loss_bc_2+loss_ic #+ loss_map
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    scheduler.step()
    if i%10 ==0:
        print('Epoch:%d \t Loss:%.8f \t Loss_pde:%.8f \t Loss_bc_1:%.8f \t Loss_bc_2:%.8f \t Loss_ic:%.8f'%(i,loss.detach().cpu().numpy(), loss_res.detach().cpu().numpy(), loss_bc_1.detach().cpu().numpy(), loss_bc_2.detach().cpu().numpy(), loss_ic.detach().cpu().numpy()))

Epoch:10 	 Loss:0.37086141 	 Loss_pde:0.02937155 	 Loss_bc_1:0.02224155 	 Loss_bc_2:0.04467606 	 Loss_ic:0.27457225
Epoch:20 	 Loss:0.29234993 	 Loss_pde:0.01519601 	 Loss_bc_1:0.02117074 	 Loss_bc_2:0.04551912 	 Loss_ic:0.21046405
Epoch:30 	 Loss:0.22837245 	 Loss_pde:0.01384033 	 Loss_bc_1:0.00459437 	 Loss_bc_2:0.04949094 	 Loss_ic:0.16044681


KeyboardInterrupt: 

In [90]:
torch.save(net.state_dict(), './net_adam.pt')

In [91]:
net.eval()
y_tensor = torch.tensor(y,dtype=torch.float32)[shuffled_indices]
test_label = y_tensor.unsqueeze(0).reshape(8, 3200,-1)

with torch.no_grad():
    test_pred,_ = net(batch_data.to("cuda:0"))


In [92]:
    
results = relative_l2_error(test_label, test_pred[:, :3200].squeeze(0).cpu())

In [93]:
results

tensor(1.0380)

In [94]:
net_lbfgs = PDETransformer(**config)
net_lbfgs.load_state_dict(torch.load('./net_adam.pt'))
net_lbfgs.to("cuda:0")
optimizer_lbfgs = torch.optim.LBFGS(
            net_lbfgs.parameters(), 
            lr=1, 
            max_iter=100,  
            history_size=10,
            tolerance_grad=1e-8, 
            tolerance_change=0,
            # line_search_fn='strong_wolfe',
        )
lbfgs_iter = 10000
net_lbfgs.train()

PDETransformer(
  (feat_extract): Sequential(
    (0): Linear(in_features=2, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): Tanh()
    (4): Linear(in_features=64, out_features=64, bias=True)
    (5): Tanh()
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): Tanh()
  )
  (encoder_layers): ModuleList(
    (0-1): 2 x PDETransformerEncoderLayer(
      (attn): Attention(
        (linears): ModuleList(
          (0-2): 3 x Linear(in_features=64, out_features=64, bias=True)
        )
      )
      (ff): FeedForward(
        (lr1): Linear(in_features=64, out_features=64, bias=True)
        (activation): Tanh()
        (bn): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (lr2): Linear(in_features=64, out_features=64, bias=True)
      )
    )
  )
  (regressor): PointwiseRegressor(
    (ff): ModuleList(
      (0-2): 3 x Sequential(
        (0): Linear(in_features=64, out_fe

In [95]:
prev_n_iter = 0
epoch = 0

while prev_n_iter < lbfgs_iter:

    def closure():
        input_data = batch_data.to('cuda:0') 
        input_data.requires_grad=True
        output, weight_list = net_lbfgs(input_data)
        optimizer_lbfgs.zero_grad()
        output, _ = net_lbfgs(input_data)


        pde_results= pde(input_data, output)
        loss_res = MSE(pde_results, torch.zeros_like(pde_results))

        # output=output[:,shuffled_indices.argsort()]
        # input_data=input_data[:,shuffled_indices.argsort()]

        bc_error_1 = bc_1.error(input_data[:, begin_indices[1]:end_indices[1]],output[:, begin_indices[1]:end_indices[1]])

        loss_bc_1 = MSE(bc_error_1, torch.zeros_like(bc_error_1))

        bc_error_2 = bc_2.error(input_data[:, begin_indices[2]:end_indices[2]],output[:, begin_indices[2]:end_indices[2]])
        loss_bc_2 = MSE(bc_error_2, torch.zeros_like(bc_error_2))

        ic_error = ic.error(input_data[:, begin_indices[3]:end_indices[3]],output[:, begin_indices[3]:end_indices[3]])
        loss_ic = MSE(ic_error, torch.zeros_like(ic_error))

        loss= loss_res+loss_bc_1+loss_bc_2+loss_ic
        loss.backward()
        return loss

    optimizer_lbfgs.step(closure)
    n_iter = optimizer_lbfgs.state_dict()["state"][0]['n_iter']
    if prev_n_iter == n_iter:
        break
    epoch += n_iter - prev_n_iter
    prev_n_iter = n_iter
    loss, loss_res, loss_bc_1, loss_bc_2, loss_ic = test()
    print('Epoch: %d, Loss:%.8f \t Loss_pde:%.8f \t Loss_bc_1:%.8f \t Loss_bc_2:%.8f \t Loss_ic:%.8f'%(prev_n_iter, loss.detach().cpu().numpy(), loss_res.detach().cpu().numpy(), loss_bc_1.detach().cpu().numpy(), loss_bc_2.detach().cpu().numpy(), loss_ic.detach().cpu().numpy()))


NameError: name 'test' is not defined

In [267]:
test_data.shape

torch.Size([4, 6400, 2])

In [266]:
net.eval()
indices = torch.arange(X.shape[0])
shuffled_indices = indices[torch.randperm(len(indices))]

test_data = torch.tensor(X,dtype=torch.float32).to('cuda:0').unsqueeze(0).reshape(4, 6400,-1)
test_label = torch.tensor(y,dtype=torch.float32).to('cuda:0').unsqueeze(0).reshape(4, 6400,-1)

In [269]:
with torch.no_grad():
    test_pred,_ = net_lbfgs(test_data)

In [270]:
test_label.shape

torch.Size([4, 6400, 1])

In [271]:
# test_pred = test_pred.view(100, 256,1).unsqueeze(0)
# test_label = test_label.view(100, 256,1).unsqueeze(0)
results = relative_l2_error(test_label, test_pred.squeeze(0))

In [272]:
results

tensor(0.6387, device='cuda:0')

In [256]:
net_lbfgs = PDETransformer(**config)
net_lbfgs.load_state_dict(torch.load('./net_adam.pt'))
net_lbfgs.to("cuda:0")
optimizer_lbfgs = torch.optim.LBFGS(
            net_lbfgs.parameters(), 
            lr=1, 
            max_iter=100,  
            history_size=10,
            tolerance_grad=1e-8, 
            tolerance_change=0,
            # line_search_fn='strong_wolfe',
        )
lbfgs_iter = 10000
net_lbfgs.train()

PDETransformer(
  (feat_extract): Sequential(
    (0): Linear(in_features=2, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): Tanh()
    (4): Linear(in_features=64, out_features=64, bias=True)
    (5): Tanh()
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): Tanh()
  )
  (encoder_layers): ModuleList(
    (0-1): 2 x PDETransformerEncoderLayer(
      (attn): Attention(
        (linears): ModuleList(
          (0-2): 3 x Linear(in_features=64, out_features=64, bias=True)
        )
      )
      (layer_norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (layer_norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (ff): FeedForward(
        (lr1): Linear(in_features=64, out_features=64, bias=True)
        (activation): Tanh()
        (lr2): Linear(in_features=64, out_features=64, bias=True)
      )
    )
  )
  (regressor): PointwiseRegressor(
    (fc): Linear(in_features=66, out_fe

In [257]:
def test():
    net_lbfgs.eval()
    
    batch_data = get_data(batch=1)
    
    input_data = batch_data.to('cuda:0') 
    indices = torch.arange(input_data.size(1))
    shuffled_indices = indices[torch.randperm(len(indices))]
    input_data = input_data[:, shuffled_indices]
    
    input_data.requires_grad=True
    output, _ = net_lbfgs(input_data)
    
    pde_results= pde(input_data, output)
    loss_res = MSE(pde_results, torch.zeros_like(pde_results))
    
    
    output=output[:,shuffled_indices.argsort()]
    input_data=input_data[:,shuffled_indices.argsort()]
    
    bc_error_1 = bc_1.error(input_data[:, begin_indices[1]:end_indices[1]],output[:, begin_indices[1]:end_indices[1]])
    
    
    loss_bc_1 = MSE(bc_error_1, torch.zeros_like(bc_error_1))
    
    bc_error_2 = bc_2.error(input_data[:, begin_indices[2]:end_indices[2]],output[:, begin_indices[2]:end_indices[2]])
    loss_bc_2 = MSE(bc_error_2, torch.zeros_like(bc_error_2))
    
    ic_error = ic.error(input_data[:, begin_indices[3]:end_indices[3]],output[:, begin_indices[3]:end_indices[3]])
    loss_ic = MSE(ic_error, torch.zeros_like(ic_error))

    loss= loss_res+loss_bc_1+loss_bc_2+loss_ic
    return loss, loss_res, loss_bc_1, loss_bc_2, loss_ic

In [258]:
prev_n_iter = 0
epoch = 0

while prev_n_iter < lbfgs_iter:
    batch_data = get_data(batch=4) 
    def closure():
        input_data = batch_data.to('cuda:0') 
        indices = torch.arange(input_data.size(1))
        shuffled_indices = indices[torch.randperm(len(indices))]
        input_data = input_data[:, shuffled_indices]

        input_data.requires_grad=True
        optimizer_lbfgs.zero_grad()
        output, _ = net_lbfgs(input_data)


        pde_results= pde(input_data, output)
        loss_res = MSE(pde_results, torch.zeros_like(pde_results))

        output=output[:,shuffled_indices.argsort()]
        input_data=input_data[:,shuffled_indices.argsort()]

        bc_error_1 = bc_1.error(input_data[:, begin_indices[1]:end_indices[1]],output[:, begin_indices[1]:end_indices[1]])

        loss_bc_1 = MSE(bc_error_1, torch.zeros_like(bc_error_1))

        bc_error_2 = bc_2.error(input_data[:, begin_indices[2]:end_indices[2]],output[:, begin_indices[2]:end_indices[2]])
        loss_bc_2 = MSE(bc_error_2, torch.zeros_like(bc_error_2))

        ic_error = ic.error(input_data[:, begin_indices[3]:end_indices[3]],output[:, begin_indices[3]:end_indices[3]])
        loss_ic = MSE(ic_error, torch.zeros_like(ic_error))

        loss= loss_res+loss_bc_1+loss_bc_2+loss_ic
        loss.backward()
        return loss

    optimizer_lbfgs.step(closure)
    n_iter = optimizer_lbfgs.state_dict()["state"][0]['n_iter']
    if prev_n_iter == n_iter:
        break
    epoch += n_iter - prev_n_iter
    prev_n_iter = n_iter
    loss, loss_res, loss_bc_1, loss_bc_2, loss_ic = test()
    print('Epoch: %d, Loss:%.8f \t Loss_pde:%.8f \t Loss_bc_1:%.8f \t Loss_bc_2:%.8f \t Loss_ic:%.8f'%(prev_n_iter, loss.detach().cpu().numpy(), loss_res.detach().cpu().numpy(), loss_bc_1.detach().cpu().numpy(), loss_bc_2.detach().cpu().numpy(), loss_ic.detach().cpu().numpy()))


Epoch: 100, Loss:0.00127215 	 Loss_pde:0.00021410 	 Loss_bc_1:0.00034431 	 Loss_bc_2:0.00027663 	 Loss_ic:0.00043711
Epoch: 200, Loss:0.00225896 	 Loss_pde:0.00024492 	 Loss_bc_1:0.00070259 	 Loss_bc_2:0.00042404 	 Loss_ic:0.00088741
Epoch: 300, Loss:0.00082792 	 Loss_pde:0.00007870 	 Loss_bc_1:0.00024741 	 Loss_bc_2:0.00016381 	 Loss_ic:0.00033800
Epoch: 400, Loss:0.00028635 	 Loss_pde:0.00008336 	 Loss_bc_1:0.00008416 	 Loss_bc_2:0.00004706 	 Loss_ic:0.00007177
Epoch: 500, Loss:0.00031579 	 Loss_pde:0.00008105 	 Loss_bc_1:0.00008163 	 Loss_bc_2:0.00005435 	 Loss_ic:0.00009877
Epoch: 600, Loss:0.00017697 	 Loss_pde:0.00008675 	 Loss_bc_1:0.00000355 	 Loss_bc_2:0.00005524 	 Loss_ic:0.00003142
Epoch: 700, Loss:0.00029267 	 Loss_pde:0.00005691 	 Loss_bc_1:0.00008175 	 Loss_bc_2:0.00004795 	 Loss_ic:0.00010605
Epoch: 800, Loss:0.00071410 	 Loss_pde:0.00007356 	 Loss_bc_1:0.00025051 	 Loss_bc_2:0.00011804 	 Loss_ic:0.00027198
Epoch: 900, Loss:0.00012797 	 Loss_pde:0.00004456 	 Loss_bc_1:0.

KeyboardInterrupt: 