In [None]:
import torch
import numpy as np
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "4, 5"
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
device = torch.device('cuda')
device_ids = [0, 1]

from timeit import default_timer
from torch.nn import DataParallel
from util.loss_func import LpLoss
from util.HNO import UNO3D
from util.load_data import get_chunk_data

torch.manual_seed(0)
np.random.seed(0)

fs = 20
fn = fs / 2  # Nyquist frequency
dt = 1 / fs
start_time_in_seconds = -0.5
end_time_in_seconds = 2.0
T = round((end_time_in_seconds - start_time_in_seconds) / dt + 1)
n_after_padding = T
freqs = torch.arange(n_after_padding // 2 + 1) * fs / (n_after_padding - 1)
ws = 2 * torch.pi * freqs
freq_to_keep = list(range(5, torch.where(freqs>=6)[0][0].item() + 1))  # select non-trivial frequencies, eliminate those with little energy
NF = len(freq_to_keep)

nx, ny, nz = 64, 64, 64
nstrain = 1  # number of training instances in time domain, number of data points is this * NF
nsvalid = 1  # number of validation instances in time domain, number of data points is this * NF
offset_valid = 1

width = 32

batch_size = 32
in_channels = 5
out_channels = 6
epochs = 100

In [None]:
model = UNO3D(in_channels+3, width, pad=0)
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)

model = DataParallel(model, device_ids=device_ids)

In [None]:
train_in, train_out = get_chunk_data(offset=0, chunk_size=nstrain, 
                                     NF=NF, nx=nx, ny=ny, nz=nz, in_channels=in_channels, out_channels=out_channels,
                                     inputpath="../data/input_S", 
                                     outputpath="../data/output_S")
train_loader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(
        train_in,
        train_out),
    batch_size=batch_size,
    shuffle=True)

valid_in, valid_out = get_chunk_data(offset=offset_valid, chunk_size=nsvalid, 
                                     NF=NF, nx=nx, ny=ny, nz=nz, in_channels=in_channels, out_channels=out_channels,
                                     inputpath="../data/input_S", 
                                     outputpath="../data/output_S")
valid_loader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(
        valid_in,
        valid_out),
    batch_size=batch_size,
    shuffle=False)

In [None]:
L2 = LpLoss(p=2, size_average=False)  
L1 = LpLoss(p=1, size_average=False)

for ep in range(epochs):
    t1 = default_timer()
    
    model.train()
    train_loss = 0.0
    train_rel_l2 = 0.0  
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        out = model(x)
        L2_loss = L2(out, y)
        L1_loss = L1(out, y)
        loss = 0.9 * L1_loss + 0.1 * L2_loss
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
                
    train_loss /= train_out.size(0)

    model.eval()
    valid_loss = 0.0
    valid_rel_l2 = 0.0
    with torch.no_grad():
        for x, y in valid_loader:
            x, y = x.to(device), y.to(device)
            out = model(x)
            L2_loss = L2(out, y)
            L1_loss = L1(out, y)
            loss = 0.9 * L1_loss + 0.1 * L2_loss
            valid_loss += loss.item() 
                
    valid_loss /= valid_out.size(0)
    
    scheduler.step()

    t2 = default_timer()

    print(ep, (t2 - t1) / 60 / 60, train_loss, valid_loss)
    
    torch.cuda.empty_cache()
