In [1]:
from pathlib import Path
import math
import time

import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from tqdm.auto import tqdm

In [2]:
path = Path("./data/128x128/clean/E_clean_TG_Re6.0_NX128_NY128_NT101.npz")
data = np.load(path)

x = data["x"]
y = data["y"]
t = data["t"]

U = data["U"]
V = data["V"]
P = data["P"]

Nx, Ny, Nt = len(x), len(y), len(t)
Re = 6.0

assert U.shape == (Nt, Ny, Nx)
assert V.shape == (Nt, Ny, Nx)
assert P.shape == (Nt, Ny, Nx)

Tg, Yg, Xg = np.meshgrid(t, y, x, indexing="ij") # (Nt, Ny, Nx)

x_flat = Xg.ravel()
y_flat = Yg.ravel()
t_flat = Tg.ravel()

u_flat = U.ravel()
v_flat = V.ravel()
p_flat = P.ravel()

N_total = x_flat.size

rng = np.random.RandomState(0)
perm = rng.permutation(N_total)

n_train = int(0.7 * N_total)
n_val = int(0.15 * N_total)
n_test   = int(0.15 * N_total)

idx_train = perm[:n_train]
idx_val = perm[n_train:n_train+n_val]
idx_test  = perm[n_train+n_val:]

train = {k: v[idx_train] for k, v in {
    "x": x_flat, "y": y_flat, "tau": t_flat,
    "u": u_flat, "v": v_flat, "p": p_flat
}.items()}
val = {k: v[idx_val] for k, v in {
    "x": x_flat, "y": y_flat, "tau": t_flat,
    "u": u_flat, "v": v_flat, "p": p_flat
}.items()}
test = {k: v[idx_test] for k, v in {
    "x": x_flat, "y": y_flat, "tau": t_flat,
    "u": u_flat, "v": v_flat, "p": p_flat
}.items()}

print("train:", train["x"].shape[0],
      "val:", val["x"].shape[0],
      "test:", test["x"].shape[0])

train: 1158348 val: 248217 test: 248219


In [3]:
class TGDataset(Dataset):
    def __init__(self, data_dict):
        self.x = torch.from_numpy(data_dict["x"]).float()
        self.y = torch.from_numpy(data_dict["y"]).float()
        self.tau = torch.from_numpy(data_dict["tau"]).float()
        self.u = torch.from_numpy(data_dict["u"]).float()
        self.v = torch.from_numpy(data_dict["v"]).float()
        self.p = torch.from_numpy(data_dict["p"]).float()

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, idx):
        return {
            "x": self.x[idx],
            "y": self.y[idx],
            "tau": self.tau[idx],
            "u": self.u[idx],
            "v": self.v[idx],
            "p": self.p[idx],
        }


train_ds = TGDataset(train)
val_ds  = TGDataset(val)
test_ds  = TGDataset(test)

BATCH_SIZE = 4_096 
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=False)
val_loader  = DataLoader(val_ds,  batch_size=BATCH_SIZE, shuffle=False, drop_last=False)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, drop_last=False)


batch = next(iter(train_loader))
print({k: v.shape for k, v in batch.items()})

{'x': torch.Size([4096]), 'y': torch.Size([4096]), 'tau': torch.Size([4096]), 'u': torch.Size([4096]), 'v': torch.Size([4096]), 'p': torch.Size([4096])}


In [4]:
class Sine(nn.Module):
    def __init__(self, w0=1.0):
        super().__init__()
        self.w0 = w0

    def forward(self, x):
        return torch.sin(self.w0 * x)


class TaylorGreenMLP(nn.Module):
    def __init__(self, in_dim=3, hidden_dim=128, n_hidden=6, w0=1.0):
        super().__init__()
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.n_hidden = n_hidden
        self.w0 = w0
        layers = []
        layers.append(nn.Linear(in_dim, hidden_dim))
        layers.append(Sine(w0))
        for _ in range(n_hidden - 1):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(Sine(w0))
        self.backbone = nn.Sequential(*layers)
        self.head_psi = nn.Linear(hidden_dim, 1)
        self.head_p = nn.Linear(hidden_dim, 1)
        self._init_weights_siren()

    def _init_weights_siren(self):
        first_done = False
        for m in self.backbone:
            if isinstance(m, nn.Linear):
                if not first_done:
                    nn.init.uniform_(m.weight, -1.0/self.in_dim, 1.0/self.in_dim)
                    nn.init.zeros_(m.bias)
                    first_done = True
                else:
                    fan_in = m.in_features
                    bound = math.sqrt(6.0/fan_in)/self.w0
                    nn.init.uniform_(m.weight, -bound, bound)
                    nn.init.zeros_(m.bias)
        nn.init.kaiming_uniform_(self.head_psi.weight, a=0.0, nonlinearity="linear")
        nn.init.zeros_(self.head_psi.bias)
        nn.init.kaiming_uniform_(self.head_p.weight, a=0.0, nonlinearity="linear")
        nn.init.zeros_(self.head_p.bias)

    def forward(self, x):
        h = self.backbone(x)
        psi_hat = self.head_psi(h)
        p_hat = self.head_p(h)
        return psi_hat, p_hat


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TaylorGreenMLP(
    in_dim=3,
    hidden_dim=64, # нейроны
    n_hidden=8, # слои
    w0=1.0, # частота
).to(device)

# batch = next(iter(train_loader))
# inp = torch.stack(
#     [batch["x"], batch["y"], batch["tau"]],
#     dim=1
# ).to(device)
# psi_pred, p_pred = model(inp)
# print(psi_pred.shape, p_pred.shape)

In [5]:
a1 = 1.0
a2 = 1e-3
a3 = 0

# a2 = 1e-4
# a3 = 3e-7

MAX_A2 = 1e-2
MAX_A3 = 1e-5

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.5)
num_epochs = 200

x_grid = torch.from_numpy(x).float().to(device)
y_grid = torch.from_numpy(y).float().to(device)
t_grid = torch.from_numpy(t).float().to(device)

Nx = x_grid.shape[0]
Ny = y_grid.shape[0]
Nt = t_grid.shape[0]

dx = float(x_grid[1] - x_grid[0])
dy = float(y_grid[1] - y_grid[0])
dt = float(t_grid[1] - t_grid[0])

# стена сдвигов, соответствующая дискретному выводу
# индекс 0 обязательно центр (i,j,k)
STENCIL = torch.tensor([
    [0, 0, 0],   # 0 центр
    [1, 0, 0],   # 1  (i+1, j, k)
    [-1, 0, 0],  # 2  (i-1, j, k)
    [2, 0, 0],   # 3  (i+2, j, k)
    [-2, 0, 0],  # 4  (i-2, j, k)
    [0, 1, 0],   # 5  (i, j+1, k)
    [0, -1, 0],  # 6  (i, j-1, k)
    [0, 2, 0],   # 7  (i, j+2, k)
    [0, -2, 0],  # 8  (i, j-2, k)
    [1, 1, 0],   # 9  (i+1, j+1, k)
    [1, -1, 0],  # 10 (i+1, j-1, k)
    [-1, 1, 0],  # 11 (i-1, j+1, k)
    [-1, -1, 0], # 12 (i-1, j-1, k)
    [0, 0, 1],   # 13 (i, j, k+1)
    [0, 0, -1],  # 14 (i, j, k-1)
    [1, 0, 1],   # 15 (i+1, j, k+1)
    [-1, 0, 1],  # 16 (i-1, j, k+1)
    [0, 1, 1],   # 17 (i, j+1, k+1)
    [0, -1, 1],  # 18 (i, j-1, k+1)
    [1, 0, -1],  # 19 (i+1, j, k-1)
    [-1, 0, -1], # 20 (i-1, j, k-1)
    [0, 1, -1],  # 21 (i, j+1, k-1)
    [0, -1, -1], # 22 (i, j-1, k-1)
], device=device)


# def pack_inputs(xb, yb, tb):
#     return torch.stack([xb, yb, tb], dim=1)


def coords_from_batch(xb, yb, tb):
    """
    xb, yb, tb: (B,)
    возвращает целочисленные индексы i,j,k по равномерной сетке
    """
    i = torch.round(xb / dx).long()
    j = torch.round(yb / dy).long()
    k = torch.round((tb - t_grid[0]) / dt).long()
    i = i.remainder(Nx)
    j = j.remainder(Ny)
    k = torch.clamp(k, 0, Nt - 1)
    return i, j, k


def eval_stencil(model, i, j, k):
    B = i.shape[0]
    S = STENCIL.shape[0]
    offs = STENCIL.unsqueeze(0).expand(B, -1, -1)  # (B, S, 3)
    i_off = (i.unsqueeze(1) + offs[:, :, 0]).remainder(Nx)
    j_off = (j.unsqueeze(1) + offs[:, :, 1]).remainder(Ny)
    k_off = (k.unsqueeze(1) + offs[:, :, 2])
    k_off = torch.clamp(k_off, 0, Nt - 1)
    x_off = x_grid[i_off]
    y_off = y_grid[j_off]
    t_off = t_grid[k_off]
    x_norm = x_off / (2 * math.pi)
    y_norm = y_off / (2 * math.pi)
    t_norm = (t_off - t_grid[0]) / (t_grid[-1] - t_grid[0])
    pts = torch.stack([x_norm, y_norm, t_norm], dim=2).view(B * S, 3)
    psi_all, p_all = model(pts)
    psi_all = psi_all.view(B, S)
    p_all = p_all.view(B, S)
    return psi_all, p_all, k_off


def time_derivative(f, f_tp, f_tm, k_int):
    """
    центральная разность по времени с односторонними на концах
    f, f_tp, f_tm: (B,)
    k_int: (B,) индексы времени текущих точек
    """
    df = (f_tp - f_tm) / (2.0 * dt)
    at_start = (k_int == 0)
    if at_start.any():
        df[at_start] = (f_tp[at_start] - f[at_start]) / dt
    at_end = (k_int == Nt - 1)
    if at_end.any():
        df[at_end] = (f[at_end] - f_tm[at_end]) / dt
    return df


def physics_forward(model, xb, yb, tb, u_true=None, v_true=None, details: bool = False):
    # индексы по сетке
    i, j, k_int = coords_from_batch(xb, yb, tb)
    # x_in = xb / (2 * math.pi)
    # y_in = yb / (2 * math.pi)
    #t_in = (tb - t_grid[0]) / (t_grid[-1] - t_grid[0])
    # inp = torch.stack([x_in, y_in, t_in], dim=1)
    # psi_here, p_here = model(inp)
    # ψ и p во всех нужных окрестностях
    psi_all, p_all, k_all = eval_stencil(model, i, j, k_int)
    # if first_batch:
    #     with torch.no_grad():
    #         print("psi min/max:", psi_all.min().item(), psi_all.max().item())
    #         print("p   min/max:", p_all.min().item(), p_all.max().item())
    #     first_batch = False
    # читаем по индексам (см. порядок в STENCIL)
    psi_c  = psi_all[:, 0]   # (i,j,k)
    psi_xp = psi_all[:, 1]   # (i+1,j,k)
    psi_xm = psi_all[:, 2]   # (i-1,j,k)
    psi_xpp = psi_all[:, 3]  # (i+2,j,k)
    psi_xmm = psi_all[:, 4]  # (i-2,j,k)
    psi_yp = psi_all[:, 5]
    psi_ym = psi_all[:, 6]
    psi_ypp = psi_all[:, 7]
    psi_ymm = psi_all[:, 8]
    psi_xpyp = psi_all[:, 9]
    psi_xpym = psi_all[:, 10]
    psi_xmyp = psi_all[:, 11]
    psi_xmym = psi_all[:, 12]
    psi_tp = psi_all[:, 13]
    psi_tm = psi_all[:, 14]
    psi_xp_tp = psi_all[:, 15]
    psi_xm_tp = psi_all[:, 16]
    psi_yp_tp = psi_all[:, 17]
    psi_ym_tp = psi_all[:, 18]
    psi_xp_tm = psi_all[:, 19]
    psi_xm_tm = psi_all[:, 20]
    psi_yp_tm = psi_all[:, 21]
    psi_ym_tm = psi_all[:, 22]
    p_c  = p_all[:, 0]
    p_xp = p_all[:, 1]
    p_xm = p_all[:, 2]
    p_yp = p_all[:, 5]
    p_ym = p_all[:, 6]
    # 1) скорости из ψ
    u_data = (-psi_ypp + 8*psi_yp - 8*psi_ym + psi_ymm) / (12.0 * dy)
    v_data = -(-psi_xpp + 8*psi_xp - 8*psi_xm + psi_xmm) / (12.0 * dx)
    u_pde = (psi_yp - psi_ym) / (2.0 * dy)
    v_pde = -(psi_xp - psi_xm) / (2.0 * dx)
    # соседи скоростей для НС
    u_xp = (psi_xpyp - psi_xpym) / (2.0 * dy)
    u_xm = (psi_xmyp - psi_xmym) / (2.0 * dy)
    u_yp = (psi_ypp - psi_c) / (2.0 * dy)
    u_ym = (psi_c - psi_ymm) / (2.0 * dy)
    v_xp = -(psi_xpp - psi_c) / (2.0 * dx)
    v_xm = -(psi_c - psi_xmm) / (2.0 * dx)
    v_yp = -(psi_xpyp - psi_xmyp) / (2.0 * dx)
    v_ym = -(psi_xpym - psi_xmym) / (2.0 * dx)
    # по времени
    u_tp = (psi_yp_tp - psi_ym_tp) / (2.0 * dy)
    u_tm = (psi_yp_tm - psi_ym_tm) / (2.0 * dy)
    v_tp = -(psi_xp_tp - psi_xm_tp) / (2.0 * dx)
    v_tm = -(psi_xp_tm - psi_xm_tm) / (2.0 * dx)
    du_dt = time_derivative(u_pde, u_tp, u_tm, k_int)
    dv_dt = time_derivative(v_pde, v_tp, v_tm, k_int)
    # пространственные производные
    du_dx = (u_xp - u_xm) / (2.0 * dx)
    du_dy = (u_yp - u_ym) / (2.0 * dy)
    dv_dx = (v_xp - v_xm) / (2.0 * dx)
    dv_dy = (v_yp - v_ym) / (2.0 * dy)
    lap_u = (u_xp - 2.0 * u_pde + u_xm) / (dx * dx) + (u_yp - 2.0 * u_pde + u_ym) / (dy * dy)
    lap_v = (v_xp - 2.0 * v_pde + v_xm) / (dx * dx) + (v_yp - 2.0 * v_pde + v_ym) / (dy * dy)
    dp_dx = (p_xp - p_xm) / (2.0 * dx)
    dp_dy = (p_yp - p_ym) / (2.0 * dy)
    # 2) невязки НС
    f_x = du_dt + u_pde * du_dx + v_pde * du_dy + dp_dx - (1.0 / Re) * lap_u
    f_y = dv_dt + u_pde * dv_dx + v_pde * dv_dy + dp_dy - (1.0 / Re) * lap_v
    # 3) вихрь ω = Δψ
    omega_c = -(
        (psi_xp - 2.0 * psi_c + psi_xm) / (dx * dx) 
        + (psi_yp - 2.0 * psi_c + psi_ym) / (dy * dy))
    omega_xp = -(
        (psi_xpp - 2.0 * psi_xp + psi_c) / (dx * dx) 
        + (psi_xpyp - 2.0 * psi_xp + psi_xpym) / (dy * dy))
    omega_xm = -(
        (psi_c - 2.0 * psi_xm + psi_xmm) / (dx * dx) 
        + (psi_xmyp - 2.0 * psi_xm + psi_xmym) / (dy * dy))
    omega_yp = -(
        (psi_xpyp - 2.0 * psi_yp + psi_xmyp) / (dx * dx) 
        + (psi_ypp - 2.0 * psi_yp + psi_c) / (dy * dy))
    omega_ym = -(
        (psi_xpym - 2.0 * psi_ym + psi_xmym) / (dx * dx) 
        + (psi_c - 2.0 * psi_ym + psi_ymm) / (dy * dy))
    # по времени для ω
    omega_tp = -(
        (psi_xp_tp - 2.0 * psi_tp + psi_xm_tp) / (dx * dx) 
        + (psi_yp_tp - 2.0 * psi_tp + psi_ym_tp) / (dy * dy))
    omega_tm = -(
        (psi_xp_tm - 2.0 * psi_tm + psi_xm_tm) / (dx * dx) 
        + (psi_yp_tm - 2.0 * psi_tm + psi_ym_tm) / (dy * dy))
    domega_dt = time_derivative(omega_c, omega_tp, omega_tm, k_int)
    domega_dx = (omega_xp - omega_xm) / (2.0 * dx)
    domega_dy = (omega_yp - omega_ym) / (2.0 * dy)
    lap_omega = (omega_xp - 2.0 * omega_c + omega_xm) / (dx * dx) + (omega_yp - 2.0 * omega_c + omega_ym) / (dy * dy)
    g_vort = domega_dt + u_pde * domega_dx + v_pde * domega_dy - (1.0 / Re) * lap_omega
    # сами потери
    L1 = F.mse_loss(u_data, u_true) + F.mse_loss(v_data, v_true)
    L2 = torch.mean(f_x ** 2 + f_y ** 2)
    L3 = torch.mean(g_vort ** 2)
    result = {
        "L1": L1,
        "L2": L2,
        "L3": L3,
    }
    if details:
        result.update({
            "u_data": u_data,
            "v_data": v_data,
            "u_pde": u_pde,
            "v_pde": v_pde,
            "p_pred": p_c,
            "f_x": f_x,
            "f_y": f_y,
            "g_vort": g_vort,})
    return result

In [6]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_L1 = 0.0
    running_L2 = 0.0
    running_L3 = 0.0
    n_seen = 0
    t0 = time.time()
    # прогресс по батчам
    pbar = tqdm(
        train_loader,
        total=len(train_loader),
        desc=f"epoch {epoch+1}/{num_epochs}",
        leave=False
    )
    first_batch = (epoch == 0)
    for batch in pbar:
        xb = batch["x"].to(device)
        yb = batch["y"].to(device)
        tb = batch["tau"].to(device)
        u_true = batch["u"].to(device)
        v_true = batch["v"].to(device)
        out = physics_forward(model, xb, yb, tb, u_true, v_true)
        loss = a1 * out["L1"] + a2 * out["L2"] + a3 * out["L3"]
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        bs = batch["x"].size(0)
        running_loss += loss.item() * bs
        running_L1 += out["L1"].item() * bs
        running_L2 += out["L2"].item() * bs
        running_L3 += out["L3"].item() * bs
        n_seen += bs
        pbar.set_postfix({
            "loss": f"{loss.item():.3e}",
            "L1": f"{out['L1'].item():.3e}",
            "wL2": f"{(a2*out['L2']).item():.3e}",
            "wL3": f"{(a3*out['L3']).item():.3e}",
            "L2": f"{out['L2'].item():.3e}",
            "L3": f"{out['L3'].item():.3e}",})
    # итоги по эпохе
    epoch_loss = running_loss / n_seen
    epoch_L1   = running_L1 / n_seen
    epoch_L2   = running_L2 / n_seen
    epoch_L3   = running_L3 / n_seen
    elapsed = time.time() - t0
    print(
        f"==> epoch {epoch+1:03d} done in {elapsed:.1f}s | "
        f"loss={epoch_loss:.6e} | L1={epoch_L1:.3e} | wL2={a2*epoch_L2:.3e} | wL3={a3*epoch_L3:.3e}")
    # если физика всё ещё тише, чем 1e-3, можно усилить
    if (epoch + 1 >= 5) in [10, 20, 40, 80, 120, 160]:
        if a2*epoch_L2 < 1e-3:
            a2 = min(a2 * 2.0, MAX_A2)
        # вихрь разгоняем реже и мягче
        if (epoch + 1) >= 40 and a3 * epoch_L3 < 1e-3:
            a3 = min(a3 * 2.0, MAX_A3)
    model.eval()
    val_L1 = 0.0
    n_val = 0
    with torch.no_grad():
        for batch in val_loader:
            xb = batch["x"].to(device)
            yb = batch["y"].to(device)
            tb = batch["tau"].to(device)
            u_true = batch["u"].to(device)
            v_true = batch["v"].to(device)
            out = physics_forward(model, xb, yb, tb, u_true, v_true)
            val_L1 += out["L1"].item() * xb.size(0)
            n_val += xb.size(0)
    val_L1 /= n_val
    print(f"val L1 = {val_L1:.3e}; a2={a2:.1e} | a3={a3:.1e}")


epoch 1/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 001 done in 166.9s | loss=5.900918e-01 | L1=3.628e-01 | wL2=2.273e-01 | wL3=0.000e+00
val L1 = 1.475e-01; a2=1.0e-03 | a3=0.0e+00


epoch 2/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 002 done in 195.4s | loss=1.194534e-01 | L1=5.971e-02 | wL2=5.974e-02 | wL3=0.000e+00
val L1 = 2.975e-02; a2=1.0e-03 | a3=0.0e+00


epoch 3/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 003 done in 195.1s | loss=6.235086e-02 | L1=2.200e-02 | wL2=4.035e-02 | wL3=0.000e+00
val L1 = 1.469e-02; a2=1.0e-03 | a3=0.0e+00


epoch 4/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 004 done in 196.0s | loss=4.086809e-02 | L1=1.292e-02 | wL2=2.795e-02 | wL3=0.000e+00
val L1 = 1.041e-02; a2=1.0e-03 | a3=0.0e+00


epoch 5/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 005 done in 195.2s | loss=3.179584e-02 | L1=9.201e-03 | wL2=2.259e-02 | wL3=0.000e+00
val L1 = 7.929e-03; a2=1.0e-03 | a3=0.0e+00


epoch 6/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 006 done in 194.6s | loss=2.373487e-02 | L1=6.661e-03 | wL2=1.707e-02 | wL3=0.000e+00
val L1 = 5.003e-03; a2=1.0e-03 | a3=0.0e+00


epoch 7/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 007 done in 195.8s | loss=2.247008e-02 | L1=5.806e-03 | wL2=1.666e-02 | wL3=0.000e+00
val L1 = 3.934e-03; a2=1.0e-03 | a3=0.0e+00


epoch 8/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 008 done in 194.8s | loss=2.332423e-02 | L1=6.258e-03 | wL2=1.707e-02 | wL3=0.000e+00
val L1 = 4.975e-03; a2=1.0e-03 | a3=0.0e+00


epoch 9/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 009 done in 194.7s | loss=1.241980e-02 | L1=3.021e-03 | wL2=9.399e-03 | wL3=0.000e+00
val L1 = 1.991e-03; a2=1.0e-03 | a3=0.0e+00


epoch 10/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 010 done in 195.6s | loss=1.411052e-02 | L1=3.530e-03 | wL2=1.058e-02 | wL3=0.000e+00
val L1 = 2.337e-03; a2=1.0e-03 | a3=0.0e+00


epoch 11/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 011 done in 193.4s | loss=1.452061e-02 | L1=3.544e-03 | wL2=1.098e-02 | wL3=0.000e+00
val L1 = 3.194e-03; a2=1.0e-03 | a3=0.0e+00


epoch 12/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 012 done in 192.6s | loss=1.895244e-02 | L1=4.870e-03 | wL2=1.408e-02 | wL3=0.000e+00
val L1 = 5.006e-03; a2=1.0e-03 | a3=0.0e+00


epoch 13/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 013 done in 195.4s | loss=2.066159e-02 | L1=5.846e-03 | wL2=1.482e-02 | wL3=0.000e+00
val L1 = 5.168e-03; a2=1.0e-03 | a3=0.0e+00


epoch 14/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 014 done in 188.6s | loss=1.353780e-02 | L1=3.275e-03 | wL2=1.026e-02 | wL3=0.000e+00
val L1 = 1.127e-02; a2=1.0e-03 | a3=0.0e+00


epoch 15/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 015 done in 194.9s | loss=1.215065e-02 | L1=3.254e-03 | wL2=8.897e-03 | wL3=0.000e+00
val L1 = 1.330e-03; a2=1.0e-03 | a3=0.0e+00


epoch 16/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 016 done in 197.9s | loss=1.200348e-02 | L1=3.052e-03 | wL2=8.952e-03 | wL3=0.000e+00
val L1 = 2.126e-03; a2=1.0e-03 | a3=0.0e+00


epoch 17/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 017 done in 194.4s | loss=1.242669e-02 | L1=3.427e-03 | wL2=9.000e-03 | wL3=0.000e+00
val L1 = 2.161e-03; a2=1.0e-03 | a3=0.0e+00


epoch 18/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 018 done in 195.2s | loss=7.974569e-03 | L1=1.928e-03 | wL2=6.046e-03 | wL3=0.000e+00
val L1 = 1.268e-03; a2=1.0e-03 | a3=0.0e+00


epoch 19/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 019 done in 228.5s | loss=8.861179e-03 | L1=2.214e-03 | wL2=6.647e-03 | wL3=0.000e+00
val L1 = 2.039e-03; a2=1.0e-03 | a3=0.0e+00


epoch 20/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 020 done in 205.0s | loss=8.566251e-03 | L1=2.032e-03 | wL2=6.535e-03 | wL3=0.000e+00
val L1 = 7.787e-04; a2=1.0e-03 | a3=0.0e+00


epoch 21/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 021 done in 193.5s | loss=1.491089e-02 | L1=3.723e-03 | wL2=1.119e-02 | wL3=0.000e+00
val L1 = 4.941e-03; a2=1.0e-03 | a3=0.0e+00


epoch 22/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 022 done in 193.7s | loss=1.043610e-02 | L1=2.670e-03 | wL2=7.766e-03 | wL3=0.000e+00
val L1 = 2.193e-03; a2=1.0e-03 | a3=0.0e+00


epoch 23/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 023 done in 193.1s | loss=9.852516e-03 | L1=3.007e-03 | wL2=6.846e-03 | wL3=0.000e+00
val L1 = 1.312e-03; a2=1.0e-03 | a3=0.0e+00


epoch 24/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 024 done in 192.1s | loss=1.384965e-02 | L1=3.291e-03 | wL2=1.056e-02 | wL3=0.000e+00
val L1 = 4.716e-03; a2=1.0e-03 | a3=0.0e+00


epoch 25/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 025 done in 192.3s | loss=1.120602e-02 | L1=2.528e-03 | wL2=8.678e-03 | wL3=0.000e+00
val L1 = 1.486e-03; a2=1.0e-03 | a3=0.0e+00


epoch 26/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 026 done in 194.0s | loss=8.386833e-03 | L1=2.102e-03 | wL2=6.285e-03 | wL3=0.000e+00
val L1 = 3.315e-03; a2=1.0e-03 | a3=0.0e+00


epoch 27/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 027 done in 195.1s | loss=7.670047e-03 | L1=2.179e-03 | wL2=5.491e-03 | wL3=0.000e+00
val L1 = 4.516e-03; a2=1.0e-03 | a3=0.0e+00


epoch 28/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 028 done in 194.8s | loss=7.190318e-03 | L1=1.538e-03 | wL2=5.652e-03 | wL3=0.000e+00
val L1 = 2.803e-03; a2=1.0e-03 | a3=0.0e+00


epoch 29/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 029 done in 194.1s | loss=8.873885e-03 | L1=2.446e-03 | wL2=6.427e-03 | wL3=0.000e+00
val L1 = 8.396e-04; a2=1.0e-03 | a3=0.0e+00


epoch 30/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 030 done in 195.9s | loss=8.156683e-03 | L1=1.904e-03 | wL2=6.253e-03 | wL3=0.000e+00
val L1 = 2.398e-03; a2=1.0e-03 | a3=0.0e+00


epoch 31/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 031 done in 194.6s | loss=6.426047e-03 | L1=1.552e-03 | wL2=4.875e-03 | wL3=0.000e+00
val L1 = 6.125e-04; a2=1.0e-03 | a3=0.0e+00


epoch 32/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 032 done in 193.6s | loss=8.450538e-03 | L1=1.940e-03 | wL2=6.510e-03 | wL3=0.000e+00
val L1 = 1.524e-03; a2=1.0e-03 | a3=0.0e+00


epoch 33/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 033 done in 194.7s | loss=8.812683e-03 | L1=1.765e-03 | wL2=7.048e-03 | wL3=0.000e+00
val L1 = 8.269e-03; a2=1.0e-03 | a3=0.0e+00


epoch 34/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 034 done in 194.1s | loss=9.806587e-03 | L1=3.089e-03 | wL2=6.718e-03 | wL3=0.000e+00
val L1 = 1.427e-03; a2=1.0e-03 | a3=0.0e+00


epoch 35/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 035 done in 193.0s | loss=7.773750e-03 | L1=1.574e-03 | wL2=6.200e-03 | wL3=0.000e+00
val L1 = 5.593e-04; a2=1.0e-03 | a3=0.0e+00


epoch 36/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 036 done in 195.3s | loss=3.925947e-03 | L1=7.959e-04 | wL2=3.130e-03 | wL3=0.000e+00
val L1 = 5.261e-04; a2=1.0e-03 | a3=0.0e+00


epoch 37/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 037 done in 193.0s | loss=4.034455e-03 | L1=9.531e-04 | wL2=3.081e-03 | wL3=0.000e+00
val L1 = 2.065e-03; a2=1.0e-03 | a3=0.0e+00


epoch 38/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 038 done in 193.1s | loss=5.911982e-03 | L1=1.827e-03 | wL2=4.084e-03 | wL3=0.000e+00
val L1 = 2.540e-04; a2=1.0e-03 | a3=0.0e+00


epoch 39/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 039 done in 194.8s | loss=5.351653e-03 | L1=1.163e-03 | wL2=4.189e-03 | wL3=0.000e+00
val L1 = 6.397e-04; a2=1.0e-03 | a3=0.0e+00


epoch 40/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 040 done in 195.4s | loss=3.568530e-03 | L1=6.637e-04 | wL2=2.905e-03 | wL3=0.000e+00
val L1 = 5.031e-04; a2=1.0e-03 | a3=0.0e+00


epoch 41/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 041 done in 194.9s | loss=2.128301e-03 | L1=6.082e-04 | wL2=1.520e-03 | wL3=0.000e+00
val L1 = 4.872e-04; a2=1.0e-03 | a3=0.0e+00


epoch 42/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 042 done in 194.6s | loss=6.398549e-03 | L1=1.703e-03 | wL2=4.696e-03 | wL3=0.000e+00
val L1 = 2.966e-03; a2=1.0e-03 | a3=0.0e+00


epoch 43/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 043 done in 194.7s | loss=3.836947e-03 | L1=9.508e-04 | wL2=2.886e-03 | wL3=0.000e+00
val L1 = 9.513e-04; a2=1.0e-03 | a3=0.0e+00


epoch 44/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 044 done in 194.9s | loss=3.148291e-03 | L1=7.502e-04 | wL2=2.398e-03 | wL3=0.000e+00
val L1 = 7.388e-04; a2=1.0e-03 | a3=0.0e+00


epoch 45/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 045 done in 193.5s | loss=5.018263e-03 | L1=1.247e-03 | wL2=3.771e-03 | wL3=0.000e+00
val L1 = 4.598e-04; a2=1.0e-03 | a3=0.0e+00


epoch 46/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 046 done in 193.6s | loss=3.110804e-03 | L1=6.830e-04 | wL2=2.428e-03 | wL3=0.000e+00
val L1 = 1.123e-03; a2=1.0e-03 | a3=0.0e+00


epoch 47/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 047 done in 195.0s | loss=5.604160e-03 | L1=1.278e-03 | wL2=4.326e-03 | wL3=0.000e+00
val L1 = 5.782e-04; a2=1.0e-03 | a3=0.0e+00


epoch 48/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 048 done in 193.5s | loss=2.550658e-03 | L1=5.004e-04 | wL2=2.050e-03 | wL3=0.000e+00
val L1 = 6.393e-04; a2=1.0e-03 | a3=0.0e+00


epoch 49/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 049 done in 193.9s | loss=7.962227e-03 | L1=3.566e-03 | wL2=4.396e-03 | wL3=0.000e+00
val L1 = 8.163e-04; a2=1.0e-03 | a3=0.0e+00


epoch 50/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 050 done in 194.4s | loss=2.703976e-03 | L1=5.909e-04 | wL2=2.113e-03 | wL3=0.000e+00
val L1 = 1.867e-04; a2=1.0e-03 | a3=0.0e+00


epoch 51/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 051 done in 194.6s | loss=3.223023e-03 | L1=6.072e-04 | wL2=2.616e-03 | wL3=0.000e+00
val L1 = 3.365e-03; a2=1.0e-03 | a3=0.0e+00


epoch 52/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 052 done in 194.5s | loss=2.473469e-03 | L1=5.266e-04 | wL2=1.947e-03 | wL3=0.000e+00
val L1 = 1.297e-04; a2=1.0e-03 | a3=0.0e+00


epoch 53/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 053 done in 195.2s | loss=5.401714e-03 | L1=1.209e-03 | wL2=4.193e-03 | wL3=0.000e+00
val L1 = 4.226e-04; a2=1.0e-03 | a3=0.0e+00


epoch 54/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 054 done in 194.1s | loss=3.221111e-03 | L1=8.485e-04 | wL2=2.373e-03 | wL3=0.000e+00
val L1 = 4.174e-03; a2=1.0e-03 | a3=0.0e+00


epoch 55/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 055 done in 194.3s | loss=2.967558e-03 | L1=8.167e-04 | wL2=2.151e-03 | wL3=0.000e+00
val L1 = 1.260e-03; a2=1.0e-03 | a3=0.0e+00


epoch 56/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 056 done in 195.5s | loss=3.836831e-03 | L1=9.038e-04 | wL2=2.933e-03 | wL3=0.000e+00
val L1 = 6.120e-03; a2=1.0e-03 | a3=0.0e+00


epoch 57/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 057 done in 193.6s | loss=3.297772e-03 | L1=7.226e-04 | wL2=2.575e-03 | wL3=0.000e+00
val L1 = 4.314e-05; a2=1.0e-03 | a3=0.0e+00


epoch 58/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 058 done in 193.1s | loss=1.636972e-03 | L1=4.126e-04 | wL2=1.224e-03 | wL3=0.000e+00
val L1 = 2.520e-03; a2=1.0e-03 | a3=0.0e+00


epoch 59/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 059 done in 193.9s | loss=2.443388e-03 | L1=5.382e-04 | wL2=1.905e-03 | wL3=0.000e+00
val L1 = 9.321e-04; a2=1.0e-03 | a3=0.0e+00


epoch 60/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 060 done in 192.8s | loss=4.894177e-03 | L1=1.276e-03 | wL2=3.618e-03 | wL3=0.000e+00
val L1 = 1.827e-04; a2=1.0e-03 | a3=0.0e+00


epoch 61/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 061 done in 193.7s | loss=2.935586e-03 | L1=5.647e-04 | wL2=2.371e-03 | wL3=0.000e+00
val L1 = 5.148e-04; a2=1.0e-03 | a3=0.0e+00


epoch 62/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 062 done in 193.1s | loss=2.100355e-03 | L1=4.739e-04 | wL2=1.626e-03 | wL3=0.000e+00
val L1 = 2.636e-05; a2=1.0e-03 | a3=0.0e+00


epoch 63/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 063 done in 193.4s | loss=7.912729e-04 | L1=1.777e-04 | wL2=6.136e-04 | wL3=0.000e+00
val L1 = 5.038e-04; a2=1.0e-03 | a3=0.0e+00


epoch 64/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 064 done in 193.5s | loss=2.953864e-03 | L1=7.943e-04 | wL2=2.160e-03 | wL3=0.000e+00
val L1 = 6.133e-05; a2=1.0e-03 | a3=0.0e+00


epoch 65/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 065 done in 192.4s | loss=1.887273e-03 | L1=3.548e-04 | wL2=1.533e-03 | wL3=0.000e+00
val L1 = 1.200e-04; a2=1.0e-03 | a3=0.0e+00


epoch 66/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 066 done in 191.2s | loss=3.288085e-03 | L1=1.178e-03 | wL2=2.110e-03 | wL3=0.000e+00
val L1 = 4.851e-04; a2=1.0e-03 | a3=0.0e+00


epoch 67/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 067 done in 194.5s | loss=1.916847e-03 | L1=5.501e-04 | wL2=1.367e-03 | wL3=0.000e+00
val L1 = 1.472e-03; a2=1.0e-03 | a3=0.0e+00


epoch 68/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 068 done in 193.2s | loss=3.075568e-03 | L1=7.300e-04 | wL2=2.346e-03 | wL3=0.000e+00
val L1 = 2.719e-04; a2=1.0e-03 | a3=0.0e+00


epoch 69/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 069 done in 193.6s | loss=3.600775e-03 | L1=7.310e-04 | wL2=2.870e-03 | wL3=0.000e+00
val L1 = 5.481e-04; a2=1.0e-03 | a3=0.0e+00


epoch 70/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 070 done in 193.8s | loss=1.411204e-03 | L1=2.755e-04 | wL2=1.136e-03 | wL3=0.000e+00
val L1 = 1.700e-05; a2=1.0e-03 | a3=0.0e+00


epoch 71/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 071 done in 195.3s | loss=1.205267e-03 | L1=2.243e-04 | wL2=9.809e-04 | wL3=0.000e+00
val L1 = 3.015e-04; a2=1.0e-03 | a3=0.0e+00


epoch 72/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 072 done in 194.0s | loss=7.468821e-04 | L1=1.790e-04 | wL2=5.678e-04 | wL3=0.000e+00
val L1 = 1.621e-05; a2=1.0e-03 | a3=0.0e+00


epoch 73/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 073 done in 194.6s | loss=3.733577e-03 | L1=1.690e-03 | wL2=2.044e-03 | wL3=0.000e+00
val L1 = 4.868e-04; a2=1.0e-03 | a3=0.0e+00


epoch 74/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 074 done in 194.6s | loss=4.551145e-03 | L1=9.622e-04 | wL2=3.589e-03 | wL3=0.000e+00
val L1 = 4.037e-04; a2=1.0e-03 | a3=0.0e+00


epoch 75/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 075 done in 217.5s | loss=2.089824e-03 | L1=4.207e-04 | wL2=1.669e-03 | wL3=0.000e+00
val L1 = 9.380e-04; a2=1.0e-03 | a3=0.0e+00


epoch 76/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 076 done in 210.9s | loss=7.066585e-04 | L1=1.558e-04 | wL2=5.508e-04 | wL3=0.000e+00
val L1 = 2.809e-05; a2=1.0e-03 | a3=0.0e+00


epoch 77/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 077 done in 195.1s | loss=2.219703e-03 | L1=5.477e-04 | wL2=1.672e-03 | wL3=0.000e+00
val L1 = 1.825e-04; a2=1.0e-03 | a3=0.0e+00


epoch 78/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 078 done in 195.0s | loss=2.490347e-03 | L1=4.997e-04 | wL2=1.991e-03 | wL3=0.000e+00
val L1 = 6.860e-05; a2=1.0e-03 | a3=0.0e+00


epoch 79/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 079 done in 194.7s | loss=7.337374e-05 | L1=2.236e-05 | wL2=5.101e-05 | wL3=0.000e+00
val L1 = 2.840e-05; a2=1.0e-03 | a3=0.0e+00


epoch 80/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 080 done in 194.5s | loss=3.076082e-03 | L1=1.367e-03 | wL2=1.709e-03 | wL3=0.000e+00
val L1 = 1.963e-03; a2=1.0e-03 | a3=0.0e+00


epoch 81/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 081 done in 193.9s | loss=5.370070e-03 | L1=1.206e-03 | wL2=4.164e-03 | wL3=0.000e+00
val L1 = 5.775e-04; a2=1.0e-03 | a3=0.0e+00


epoch 82/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 082 done in 195.5s | loss=3.129340e-03 | L1=5.983e-04 | wL2=2.531e-03 | wL3=0.000e+00
val L1 = 3.013e-04; a2=1.0e-03 | a3=0.0e+00


epoch 83/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 083 done in 194.0s | loss=1.465294e-04 | L1=4.959e-05 | wL2=9.694e-05 | wL3=0.000e+00
val L1 = 1.981e-05; a2=1.0e-03 | a3=0.0e+00


epoch 84/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 084 done in 194.5s | loss=2.965815e-03 | L1=9.177e-04 | wL2=2.048e-03 | wL3=0.000e+00
val L1 = 2.597e-05; a2=1.0e-03 | a3=0.0e+00


epoch 85/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 085 done in 193.5s | loss=3.432072e-04 | L1=6.749e-05 | wL2=2.757e-04 | wL3=0.000e+00
val L1 = 1.703e-04; a2=1.0e-03 | a3=0.0e+00


epoch 86/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 086 done in 195.2s | loss=1.279158e-03 | L1=3.107e-04 | wL2=9.684e-04 | wL3=0.000e+00
val L1 = 1.929e-04; a2=1.0e-03 | a3=0.0e+00


epoch 87/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 087 done in 195.3s | loss=9.485118e-04 | L1=2.087e-04 | wL2=7.399e-04 | wL3=0.000e+00
val L1 = 7.890e-05; a2=1.0e-03 | a3=0.0e+00


epoch 88/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 088 done in 193.5s | loss=2.635983e-03 | L1=7.628e-04 | wL2=1.873e-03 | wL3=0.000e+00
val L1 = 1.083e-03; a2=1.0e-03 | a3=0.0e+00


epoch 89/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 089 done in 194.2s | loss=7.536653e-04 | L1=2.317e-04 | wL2=5.220e-04 | wL3=0.000e+00
val L1 = 1.287e-05; a2=1.0e-03 | a3=0.0e+00


epoch 90/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 090 done in 193.9s | loss=3.159891e-04 | L1=6.026e-05 | wL2=2.557e-04 | wL3=0.000e+00
val L1 = 2.066e-04; a2=1.0e-03 | a3=0.0e+00


epoch 91/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 091 done in 194.2s | loss=3.990756e-03 | L1=9.923e-04 | wL2=2.998e-03 | wL3=0.000e+00
val L1 = 3.770e-04; a2=1.0e-03 | a3=0.0e+00


epoch 92/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 092 done in 195.2s | loss=7.070423e-04 | L1=1.443e-04 | wL2=5.627e-04 | wL3=0.000e+00
val L1 = 4.796e-04; a2=1.0e-03 | a3=0.0e+00


epoch 93/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 093 done in 192.6s | loss=4.429976e-03 | L1=1.051e-03 | wL2=3.379e-03 | wL3=0.000e+00
val L1 = 7.463e-04; a2=1.0e-03 | a3=0.0e+00


epoch 94/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 094 done in 193.1s | loss=2.149350e-03 | L1=6.044e-04 | wL2=1.545e-03 | wL3=0.000e+00
val L1 = 5.459e-05; a2=1.0e-03 | a3=0.0e+00


epoch 95/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 095 done in 193.1s | loss=1.683750e-04 | L1=3.421e-05 | wL2=1.342e-04 | wL3=0.000e+00
val L1 = 3.713e-05; a2=1.0e-03 | a3=0.0e+00


epoch 96/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 096 done in 193.2s | loss=5.318587e-03 | L1=1.412e-03 | wL2=3.907e-03 | wL3=0.000e+00
val L1 = 6.700e-04; a2=1.0e-03 | a3=0.0e+00


epoch 97/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 097 done in 193.0s | loss=2.099983e-03 | L1=4.900e-04 | wL2=1.610e-03 | wL3=0.000e+00
val L1 = 1.176e-05; a2=1.0e-03 | a3=0.0e+00


epoch 98/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 098 done in 193.8s | loss=1.657341e-04 | L1=3.410e-05 | wL2=1.316e-04 | wL3=0.000e+00
val L1 = 2.600e-04; a2=1.0e-03 | a3=0.0e+00


epoch 99/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 099 done in 194.7s | loss=1.881079e-03 | L1=3.758e-04 | wL2=1.505e-03 | wL3=0.000e+00
val L1 = 7.949e-04; a2=1.0e-03 | a3=0.0e+00


epoch 100/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 100 done in 193.7s | loss=2.807745e-03 | L1=7.268e-04 | wL2=2.081e-03 | wL3=0.000e+00
val L1 = 3.839e-04; a2=1.0e-03 | a3=0.0e+00


epoch 101/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 101 done in 195.2s | loss=3.626457e-04 | L1=7.708e-05 | wL2=2.856e-04 | wL3=0.000e+00
val L1 = 6.761e-06; a2=1.0e-03 | a3=0.0e+00


epoch 102/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 102 done in 195.6s | loss=1.168845e-04 | L1=2.518e-05 | wL2=9.170e-05 | wL3=0.000e+00
val L1 = 5.393e-06; a2=1.0e-03 | a3=0.0e+00


epoch 103/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 103 done in 194.0s | loss=1.902208e-03 | L1=5.269e-04 | wL2=1.375e-03 | wL3=0.000e+00
val L1 = 2.253e-03; a2=1.0e-03 | a3=0.0e+00


epoch 104/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 104 done in 194.1s | loss=7.072461e-04 | L1=2.979e-04 | wL2=4.093e-04 | wL3=0.000e+00
val L1 = 1.220e-05; a2=1.0e-03 | a3=0.0e+00


epoch 105/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 105 done in 194.6s | loss=9.781655e-04 | L1=1.983e-04 | wL2=7.798e-04 | wL3=0.000e+00
val L1 = 5.988e-05; a2=1.0e-03 | a3=0.0e+00


epoch 106/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 106 done in 193.2s | loss=3.128015e-04 | L1=5.996e-05 | wL2=2.528e-04 | wL3=0.000e+00
val L1 = 1.586e-04; a2=1.0e-03 | a3=0.0e+00


epoch 107/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 107 done in 194.4s | loss=4.393105e-03 | L1=1.466e-03 | wL2=2.928e-03 | wL3=0.000e+00
val L1 = 4.351e-04; a2=1.0e-03 | a3=0.0e+00


epoch 108/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 108 done in 194.4s | loss=8.764887e-04 | L1=1.853e-04 | wL2=6.912e-04 | wL3=0.000e+00
val L1 = 1.657e-05; a2=1.0e-03 | a3=0.0e+00


epoch 109/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 109 done in 194.4s | loss=4.898925e-04 | L1=1.090e-04 | wL2=3.809e-04 | wL3=0.000e+00
val L1 = 1.003e-04; a2=1.0e-03 | a3=0.0e+00


epoch 110/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 110 done in 193.4s | loss=5.634337e-04 | L1=1.131e-04 | wL2=4.504e-04 | wL3=0.000e+00
val L1 = 1.217e-04; a2=1.0e-03 | a3=0.0e+00


epoch 111/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 111 done in 192.4s | loss=2.929364e-03 | L1=8.679e-04 | wL2=2.061e-03 | wL3=0.000e+00
val L1 = 2.601e-05; a2=1.0e-03 | a3=0.0e+00


epoch 112/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 112 done in 195.1s | loss=1.110841e-03 | L1=2.097e-04 | wL2=9.012e-04 | wL3=0.000e+00
val L1 = 2.866e-05; a2=1.0e-03 | a3=0.0e+00


epoch 113/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 113 done in 193.7s | loss=1.312824e-04 | L1=2.572e-05 | wL2=1.056e-04 | wL3=0.000e+00
val L1 = 8.650e-04; a2=1.0e-03 | a3=0.0e+00


epoch 114/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 114 done in 194.5s | loss=1.872753e-03 | L1=4.444e-04 | wL2=1.428e-03 | wL3=0.000e+00
val L1 = 1.163e-05; a2=1.0e-03 | a3=0.0e+00


epoch 115/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 115 done in 194.2s | loss=1.308549e-03 | L1=6.314e-04 | wL2=6.771e-04 | wL3=0.000e+00
val L1 = 7.664e-05; a2=1.0e-03 | a3=0.0e+00


epoch 116/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 116 done in 194.0s | loss=1.233108e-04 | L1=2.694e-05 | wL2=9.637e-05 | wL3=0.000e+00
val L1 = 9.833e-05; a2=1.0e-03 | a3=0.0e+00


epoch 117/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 117 done in 195.1s | loss=1.380400e-03 | L1=2.648e-04 | wL2=1.116e-03 | wL3=0.000e+00
val L1 = 7.919e-06; a2=1.0e-03 | a3=0.0e+00


epoch 118/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 118 done in 193.6s | loss=1.287132e-04 | L1=2.726e-05 | wL2=1.014e-04 | wL3=0.000e+00
val L1 = 1.176e-04; a2=1.0e-03 | a3=0.0e+00


epoch 119/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 119 done in 194.0s | loss=3.839112e-04 | L1=8.042e-05 | wL2=3.035e-04 | wL3=0.000e+00
val L1 = 3.172e-05; a2=1.0e-03 | a3=0.0e+00


epoch 120/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 120 done in 194.4s | loss=4.066377e-03 | L1=1.505e-03 | wL2=2.561e-03 | wL3=0.000e+00
val L1 = 1.037e-03; a2=1.0e-03 | a3=0.0e+00


epoch 121/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 121 done in 194.9s | loss=4.605110e-03 | L1=9.501e-04 | wL2=3.655e-03 | wL3=0.000e+00
val L1 = 1.133e-03; a2=1.0e-03 | a3=0.0e+00


epoch 122/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 122 done in 194.0s | loss=1.745782e-03 | L1=4.105e-04 | wL2=1.335e-03 | wL3=0.000e+00
val L1 = 3.327e-05; a2=1.0e-03 | a3=0.0e+00


epoch 123/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 123 done in 194.6s | loss=2.162868e-05 | L1=1.043e-05 | wL2=1.120e-05 | wL3=0.000e+00
val L1 = 7.349e-06; a2=1.0e-03 | a3=0.0e+00


epoch 124/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 124 done in 193.7s | loss=2.350982e-04 | L1=4.459e-05 | wL2=1.905e-04 | wL3=0.000e+00
val L1 = 6.021e-05; a2=1.0e-03 | a3=0.0e+00


epoch 125/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 125 done in 195.6s | loss=2.485345e-04 | L1=4.771e-05 | wL2=2.008e-04 | wL3=0.000e+00
val L1 = 2.899e-05; a2=1.0e-03 | a3=0.0e+00


epoch 126/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 126 done in 193.2s | loss=1.669549e-03 | L1=6.663e-04 | wL2=1.003e-03 | wL3=0.000e+00
val L1 = 3.101e-03; a2=1.0e-03 | a3=0.0e+00


epoch 127/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 127 done in 193.6s | loss=4.840925e-03 | L1=1.300e-03 | wL2=3.541e-03 | wL3=0.000e+00
val L1 = 2.523e-05; a2=1.0e-03 | a3=0.0e+00


epoch 128/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 128 done in 195.3s | loss=1.438304e-03 | L1=3.073e-04 | wL2=1.131e-03 | wL3=0.000e+00
val L1 = 1.129e-05; a2=1.0e-03 | a3=0.0e+00


epoch 129/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 129 done in 194.4s | loss=3.021769e-03 | L1=1.266e-03 | wL2=1.756e-03 | wL3=0.000e+00
val L1 = 5.081e-04; a2=1.0e-03 | a3=0.0e+00


epoch 130/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 130 done in 194.6s | loss=1.107454e-04 | L1=3.928e-05 | wL2=7.146e-05 | wL3=0.000e+00
val L1 = 3.302e-05; a2=1.0e-03 | a3=0.0e+00


epoch 131/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 131 done in 194.4s | loss=9.784182e-04 | L1=1.961e-04 | wL2=7.823e-04 | wL3=0.000e+00
val L1 = 6.767e-06; a2=1.0e-03 | a3=0.0e+00


epoch 132/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 132 done in 194.6s | loss=5.851795e-04 | L1=1.131e-04 | wL2=4.721e-04 | wL3=0.000e+00
val L1 = 1.278e-05; a2=1.0e-03 | a3=0.0e+00


epoch 133/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 133 done in 195.3s | loss=9.752877e-05 | L1=1.920e-05 | wL2=7.833e-05 | wL3=0.000e+00
val L1 = 1.899e-05; a2=1.0e-03 | a3=0.0e+00


epoch 134/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 134 done in 194.5s | loss=3.310861e-03 | L1=1.328e-03 | wL2=1.983e-03 | wL3=0.000e+00
val L1 = 1.715e-04; a2=1.0e-03 | a3=0.0e+00


epoch 135/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 135 done in 195.5s | loss=1.516101e-04 | L1=3.682e-05 | wL2=1.148e-04 | wL3=0.000e+00
val L1 = 1.257e-05; a2=1.0e-03 | a3=0.0e+00


epoch 136/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 136 done in 193.4s | loss=6.556671e-04 | L1=1.379e-04 | wL2=5.178e-04 | wL3=0.000e+00
val L1 = 2.049e-04; a2=1.0e-03 | a3=0.0e+00


epoch 137/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 137 done in 193.4s | loss=1.538050e-03 | L1=3.359e-04 | wL2=1.202e-03 | wL3=0.000e+00
val L1 = 2.938e-04; a2=1.0e-03 | a3=0.0e+00


epoch 138/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 138 done in 194.3s | loss=3.815936e-04 | L1=8.168e-05 | wL2=2.999e-04 | wL3=0.000e+00
val L1 = 1.951e-04; a2=1.0e-03 | a3=0.0e+00


epoch 139/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 139 done in 194.4s | loss=1.255226e-04 | L1=2.761e-05 | wL2=9.792e-05 | wL3=0.000e+00
val L1 = 2.609e-05; a2=1.0e-03 | a3=0.0e+00


epoch 140/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 140 done in 195.7s | loss=2.448033e-03 | L1=9.600e-04 | wL2=1.488e-03 | wL3=0.000e+00
val L1 = 6.414e-03; a2=1.0e-03 | a3=0.0e+00


epoch 141/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 141 done in 194.7s | loss=1.849653e-03 | L1=6.163e-04 | wL2=1.233e-03 | wL3=0.000e+00
val L1 = 1.412e-05; a2=1.0e-03 | a3=0.0e+00


epoch 142/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 142 done in 195.2s | loss=5.237209e-04 | L1=1.213e-04 | wL2=4.024e-04 | wL3=0.000e+00
val L1 = 3.143e-04; a2=1.0e-03 | a3=0.0e+00


epoch 143/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 143 done in 192.5s | loss=4.601409e-04 | L1=8.882e-05 | wL2=3.713e-04 | wL3=0.000e+00
val L1 = 6.258e-06; a2=1.0e-03 | a3=0.0e+00


epoch 144/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 144 done in 193.6s | loss=1.284815e-03 | L1=3.048e-04 | wL2=9.800e-04 | wL3=0.000e+00
val L1 = 8.036e-04; a2=1.0e-03 | a3=0.0e+00


epoch 145/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 145 done in 194.2s | loss=2.605428e-03 | L1=1.070e-03 | wL2=1.535e-03 | wL3=0.000e+00
val L1 = 1.531e-03; a2=1.0e-03 | a3=0.0e+00


epoch 146/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 146 done in 190.0s | loss=1.160793e-03 | L1=2.816e-04 | wL2=8.792e-04 | wL3=0.000e+00
val L1 = 1.519e-05; a2=1.0e-03 | a3=0.0e+00


epoch 147/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 147 done in 186.3s | loss=2.835444e-05 | L1=1.229e-05 | wL2=1.607e-05 | wL3=0.000e+00
val L1 = 8.426e-06; a2=1.0e-03 | a3=0.0e+00


epoch 148/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 148 done in 194.0s | loss=9.497174e-04 | L1=3.424e-04 | wL2=6.073e-04 | wL3=0.000e+00
val L1 = 1.447e-05; a2=1.0e-03 | a3=0.0e+00


epoch 149/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 149 done in 195.9s | loss=4.054643e-04 | L1=7.519e-05 | wL2=3.303e-04 | wL3=0.000e+00
val L1 = 7.157e-06; a2=1.0e-03 | a3=0.0e+00


epoch 150/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 150 done in 195.2s | loss=1.262843e-03 | L1=3.770e-04 | wL2=8.858e-04 | wL3=0.000e+00
val L1 = 8.019e-03; a2=1.0e-03 | a3=0.0e+00


epoch 151/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 151 done in 196.1s | loss=2.244018e-03 | L1=8.201e-04 | wL2=1.424e-03 | wL3=0.000e+00
val L1 = 1.718e-05; a2=1.0e-03 | a3=0.0e+00


epoch 152/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 152 done in 195.7s | loss=1.887332e-03 | L1=4.816e-04 | wL2=1.406e-03 | wL3=0.000e+00
val L1 = 5.796e-04; a2=1.0e-03 | a3=0.0e+00


epoch 153/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 153 done in 194.9s | loss=2.189274e-03 | L1=4.749e-04 | wL2=1.714e-03 | wL3=0.000e+00
val L1 = 1.288e-05; a2=1.0e-03 | a3=0.0e+00


epoch 154/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 154 done in 194.6s | loss=1.003864e-03 | L1=2.063e-04 | wL2=7.975e-04 | wL3=0.000e+00
val L1 = 4.698e-05; a2=1.0e-03 | a3=0.0e+00


epoch 155/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 155 done in 196.0s | loss=1.924923e-05 | L1=7.758e-06 | wL2=1.149e-05 | wL3=0.000e+00
val L1 = 4.920e-06; a2=1.0e-03 | a3=0.0e+00


epoch 156/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 156 done in 194.2s | loss=3.039937e-03 | L1=1.071e-03 | wL2=1.969e-03 | wL3=0.000e+00
val L1 = 1.893e-05; a2=1.0e-03 | a3=0.0e+00


epoch 157/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 157 done in 195.2s | loss=1.123621e-03 | L1=2.505e-04 | wL2=8.731e-04 | wL3=0.000e+00
val L1 = 7.880e-05; a2=1.0e-03 | a3=0.0e+00


epoch 158/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 158 done in 195.6s | loss=1.371844e-03 | L1=5.820e-04 | wL2=7.898e-04 | wL3=0.000e+00
val L1 = 5.874e-05; a2=1.0e-03 | a3=0.0e+00


epoch 159/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 159 done in 195.1s | loss=1.268606e-04 | L1=2.810e-05 | wL2=9.876e-05 | wL3=0.000e+00
val L1 = 5.081e-05; a2=1.0e-03 | a3=0.0e+00


epoch 160/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 160 done in 195.7s | loss=1.007610e-03 | L1=1.851e-04 | wL2=8.225e-04 | wL3=0.000e+00
val L1 = 5.014e-06; a2=1.0e-03 | a3=0.0e+00


epoch 161/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 161 done in 194.6s | loss=1.174832e-05 | L1=4.632e-06 | wL2=7.117e-06 | wL3=0.000e+00
val L1 = 3.109e-06; a2=1.0e-03 | a3=0.0e+00


epoch 162/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 162 done in 195.2s | loss=2.186244e-03 | L1=1.032e-03 | wL2=1.154e-03 | wL3=0.000e+00
val L1 = 3.446e-03; a2=1.0e-03 | a3=0.0e+00


epoch 163/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 163 done in 205.9s | loss=6.544126e-04 | L1=2.187e-04 | wL2=4.357e-04 | wL3=0.000e+00
val L1 = 6.631e-05; a2=1.0e-03 | a3=0.0e+00


epoch 164/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 164 done in 222.1s | loss=2.014249e-04 | L1=4.081e-05 | wL2=1.606e-04 | wL3=0.000e+00
val L1 = 8.618e-05; a2=1.0e-03 | a3=0.0e+00


epoch 165/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 165 done in 196.0s | loss=9.067408e-04 | L1=1.744e-04 | wL2=7.324e-04 | wL3=0.000e+00
val L1 = 1.741e-04; a2=1.0e-03 | a3=0.0e+00


epoch 166/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 166 done in 197.1s | loss=1.276441e-03 | L1=2.460e-04 | wL2=1.030e-03 | wL3=0.000e+00
val L1 = 5.941e-06; a2=1.0e-03 | a3=0.0e+00


epoch 167/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 167 done in 195.7s | loss=1.146422e-05 | L1=5.068e-06 | wL2=6.396e-06 | wL3=0.000e+00
val L1 = 5.524e-06; a2=1.0e-03 | a3=0.0e+00


epoch 168/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 168 done in 194.6s | loss=2.860868e-04 | L1=6.251e-05 | wL2=2.236e-04 | wL3=0.000e+00
val L1 = 4.128e-04; a2=1.0e-03 | a3=0.0e+00


epoch 169/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 169 done in 196.6s | loss=2.828486e-03 | L1=1.547e-03 | wL2=1.281e-03 | wL3=0.000e+00
val L1 = 1.763e-05; a2=1.0e-03 | a3=0.0e+00


epoch 170/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 170 done in 195.6s | loss=3.032796e-03 | L1=6.549e-04 | wL2=2.378e-03 | wL3=0.000e+00
val L1 = 1.425e-03; a2=1.0e-03 | a3=0.0e+00


epoch 171/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 171 done in 196.7s | loss=8.918747e-04 | L1=2.249e-04 | wL2=6.670e-04 | wL3=0.000e+00
val L1 = 9.543e-05; a2=1.0e-03 | a3=0.0e+00


epoch 172/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 172 done in 194.6s | loss=1.046263e-03 | L1=1.947e-04 | wL2=8.515e-04 | wL3=0.000e+00
val L1 = 8.174e-06; a2=1.0e-03 | a3=0.0e+00


epoch 173/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 173 done in 196.0s | loss=1.042415e-05 | L1=5.048e-06 | wL2=5.376e-06 | wL3=0.000e+00
val L1 = 3.851e-06; a2=1.0e-03 | a3=0.0e+00


epoch 174/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 174 done in 196.5s | loss=3.348282e-03 | L1=1.540e-03 | wL2=1.809e-03 | wL3=0.000e+00
val L1 = 5.292e-03; a2=1.0e-03 | a3=0.0e+00


epoch 175/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 175 done in 196.2s | loss=1.844060e-03 | L1=5.640e-04 | wL2=1.280e-03 | wL3=0.000e+00
val L1 = 9.603e-06; a2=1.0e-03 | a3=0.0e+00


epoch 176/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 176 done in 196.0s | loss=1.064596e-04 | L1=2.163e-05 | wL2=8.483e-05 | wL3=0.000e+00
val L1 = 6.015e-05; a2=1.0e-03 | a3=0.0e+00


epoch 177/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 177 done in 195.6s | loss=1.021597e-03 | L1=2.128e-04 | wL2=8.088e-04 | wL3=0.000e+00
val L1 = 1.859e-03; a2=1.0e-03 | a3=0.0e+00


epoch 178/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 178 done in 195.3s | loss=2.284140e-03 | L1=6.034e-04 | wL2=1.681e-03 | wL3=0.000e+00
val L1 = 5.951e-04; a2=1.0e-03 | a3=0.0e+00


epoch 179/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 179 done in 195.4s | loss=3.079931e-03 | L1=6.756e-04 | wL2=2.404e-03 | wL3=0.000e+00
val L1 = 1.836e-05; a2=1.0e-03 | a3=0.0e+00


epoch 180/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 180 done in 194.6s | loss=2.387828e-05 | L1=9.044e-06 | wL2=1.483e-05 | wL3=0.000e+00
val L1 = 5.104e-06; a2=1.0e-03 | a3=0.0e+00


epoch 181/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 181 done in 194.9s | loss=4.983526e-04 | L1=1.183e-04 | wL2=3.800e-04 | wL3=0.000e+00
val L1 = 5.491e-06; a2=1.0e-03 | a3=0.0e+00


epoch 182/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 182 done in 195.9s | loss=2.067354e-05 | L1=6.049e-06 | wL2=1.462e-05 | wL3=0.000e+00
val L1 = 4.947e-06; a2=1.0e-03 | a3=0.0e+00


epoch 183/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 183 done in 198.2s | loss=8.269179e-04 | L1=2.799e-04 | wL2=5.470e-04 | wL3=0.000e+00
val L1 = 1.255e-03; a2=1.0e-03 | a3=0.0e+00


epoch 184/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 184 done in 202.1s | loss=2.971676e-04 | L1=8.541e-05 | wL2=2.118e-04 | wL3=0.000e+00
val L1 = 5.527e-06; a2=1.0e-03 | a3=0.0e+00


epoch 185/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 185 done in 196.2s | loss=1.147647e-04 | L1=2.395e-05 | wL2=9.081e-05 | wL3=0.000e+00
val L1 = 7.532e-05; a2=1.0e-03 | a3=0.0e+00


epoch 186/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 186 done in 196.2s | loss=3.131245e-04 | L1=6.349e-05 | wL2=2.496e-04 | wL3=0.000e+00
val L1 = 3.378e-05; a2=1.0e-03 | a3=0.0e+00


epoch 187/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 187 done in 196.3s | loss=1.035540e-03 | L1=3.946e-04 | wL2=6.409e-04 | wL3=0.000e+00
val L1 = 2.277e-03; a2=1.0e-03 | a3=0.0e+00


epoch 188/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 188 done in 195.5s | loss=1.297904e-03 | L1=3.688e-04 | wL2=9.291e-04 | wL3=0.000e+00
val L1 = 8.694e-06; a2=1.0e-03 | a3=0.0e+00


epoch 189/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 189 done in 197.6s | loss=2.987763e-05 | L1=8.964e-06 | wL2=2.091e-05 | wL3=0.000e+00
val L1 = 1.887e-05; a2=1.0e-03 | a3=0.0e+00


epoch 190/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 190 done in 124.8s | loss=2.403983e-04 | L1=4.670e-05 | wL2=1.937e-04 | wL3=0.000e+00
val L1 = 3.015e-05; a2=1.0e-03 | a3=0.0e+00


epoch 191/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 191 done in 98.1s | loss=1.511439e-03 | L1=5.070e-04 | wL2=1.004e-03 | wL3=0.000e+00
val L1 = 4.840e-06; a2=1.0e-03 | a3=0.0e+00


epoch 192/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 192 done in 91.4s | loss=9.799738e-06 | L1=3.915e-06 | wL2=5.885e-06 | wL3=0.000e+00
val L1 = 3.689e-06; a2=1.0e-03 | a3=0.0e+00


epoch 193/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 193 done in 110.5s | loss=1.897501e-04 | L1=3.563e-05 | wL2=1.541e-04 | wL3=0.000e+00
val L1 = 8.232e-06; a2=1.0e-03 | a3=0.0e+00


epoch 194/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 194 done in 105.4s | loss=4.112945e-04 | L1=8.214e-05 | wL2=3.292e-04 | wL3=0.000e+00
val L1 = 1.034e-04; a2=1.0e-03 | a3=0.0e+00


epoch 195/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 195 done in 98.9s | loss=3.107657e-03 | L1=1.361e-03 | wL2=1.747e-03 | wL3=0.000e+00
val L1 = 6.231e-04; a2=1.0e-03 | a3=0.0e+00


epoch 196/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 196 done in 108.0s | loss=1.411130e-03 | L1=3.063e-04 | wL2=1.105e-03 | wL3=0.000e+00
val L1 = 1.600e-05; a2=1.0e-03 | a3=0.0e+00


epoch 197/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 197 done in 108.7s | loss=2.422687e-05 | L1=1.159e-05 | wL2=1.263e-05 | wL3=0.000e+00
val L1 = 7.703e-06; a2=1.0e-03 | a3=0.0e+00


epoch 198/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 198 done in 108.8s | loss=2.121510e-04 | L1=4.081e-05 | wL2=1.713e-04 | wL3=0.000e+00
val L1 = 2.649e-05; a2=1.0e-03 | a3=0.0e+00


epoch 199/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 199 done in 109.1s | loss=1.609384e-03 | L1=6.405e-04 | wL2=9.689e-04 | wL3=0.000e+00
val L1 = 4.148e-03; a2=1.0e-03 | a3=0.0e+00


epoch 200/200:   0%|          | 0/283 [00:00<?, ?it/s]

==> epoch 200 done in 71.0s | loss=1.205703e-03 | L1=3.507e-04 | wL2=8.550e-04 | wL3=0.000e+00
val L1 = 3.516e-05; a2=1.0e-03 | a3=0.0e+00


In [7]:
ckpt_dir = Path("./checkpoints")
ckpt_dir.mkdir(parents=True, exist_ok=True)

def save_checkpoint(model, optimizer, scheduler, epoch, path):
    state = {
        "epoch": epoch,
        "model": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "scheduler": scheduler.state_dict() if scheduler is not None else None,
    }
    torch.save(state, path)
    print(f"saved to {path}")
    
save_checkpoint(model, optimizer, scheduler, epoch+1, ckpt_dir / "tgMLP_128x128_epoch=200_a1_a2.pth",)

saved to checkpoints\tgMLP_128x128_epoch=200_a1_a2.pth


In [None]:
def load_checkpoint(path, model, optimizer=None, scheduler=None, map_location="cpu"):
    ckpt = torch.load(path, map_location=map_location)
    model.load_state_dict(ckpt["model"])
    if optimizer is not None and "optimizer" in ckpt and ckpt["optimizer"] is not None:
        optimizer.load_state_dict(ckpt["optimizer"])
    if scheduler is not None and "scheduler" in ckpt and ckpt["scheduler"] is not None:
        scheduler.load_state_dict(ckpt["scheduler"])
    print(f"loaded from {path}, epoch={ckpt.get('epoch', '?')}")
    return ckpt.get("epoch", None)

state_dict = torch.load("tgMLP_64x64_epoch=200.pth", map_location="cpu")
model.load_state_dict(state_dict)
model.to(device)
model.eval()

In [8]:
model.eval()

sum_u_mse = 0.0
sum_v_mse = 0.0
sum_p_mse_raw = 0.0
sum_u_true_sq = 0.0
sum_v_true_sq = 0.0

sum_dp = 0.0 # Σ (p_pred - p_true)
sum_dp2 = 0.0 # Σ (p_pred - p_true)^2

sum_fx_mse = 0.0
sum_fy_mse = 0.0
sum_g_mse  = 0.0

max_fx = 0.0
max_fy = 0.0
max_g  = 0.0

n_points = 0

with torch.no_grad():
    for batch in tqdm(test_loader, desc="eval on test", leave=False):
        xb = batch["x"].to(device)
        yb = batch["y"].to(device)
        tb = batch["tau"].to(device)
        u_true = batch["u"].to(device)
        v_true = batch["v"].to(device)
        p_true = batch["p"].to(device)
        B = xb.size(0)
        out = physics_forward(
            model,
            xb, yb, tb,
            u_true=u_true,
            v_true=v_true,
            details=True)
        # данные
        u_pred = out["u_data"] # 4-й порядок, как в train
        v_pred = out["v_data"]
        p_pred = out["p_pred"]
        # физика
        f_x = out["f_x"]
        f_y = out["f_y"]
        g_vort = out["g_vort"]
        # метрики по данным
        mse_u = torch.mean((u_pred - u_true) ** 2)
        mse_v = torch.mean((v_pred - v_true) ** 2)
        mse_p_raw = torch.mean((p_pred - p_true) ** 2)
        sum_u_mse += mse_u.item() * B
        sum_v_mse += mse_v.item() * B
        sum_p_mse_raw += mse_p_raw.item() * B
        # для относительных норм
        sum_u_true_sq += torch.mean(u_true ** 2).item() * B
        sum_v_true_sq += torch.mean(v_true ** 2).item() * B
        # shift-invariant давление
        # d = p_pred - p_true
        dp = (p_pred - p_true)
        sum_dp  += dp.sum().item()
        sum_dp2 += (dp * dp).sum().item()
        # метрики по физике
        mse_fx = torch.mean(f_x ** 2)
        mse_fy = torch.mean(f_y ** 2)
        mse_g  = torch.mean(g_vort ** 2)
        sum_fx_mse += mse_fx.item() * B
        sum_fy_mse += mse_fy.item() * B
        sum_g_mse  += mse_g.item()  * B
        max_fx = max(max_fx, torch.max(torch.abs(f_x)).item())
        max_fy = max(max_fy, torch.max(torch.abs(f_y)).item())
        max_g  = max(max_g,  torch.max(torch.abs(g_vort)).item())
        n_points += B

# усреднение
mean_mse_u = sum_u_mse / n_points
mean_mse_v = sum_v_mse / n_points
mean_mse_p_raw = sum_p_mse_raw / n_points

rel_l2_u = math.sqrt(mean_mse_u) / math.sqrt(sum_u_true_sq / n_points + 1e-12)
rel_l2_v = math.sqrt(mean_mse_v) / math.sqrt(sum_v_true_sq / n_points + 1e-12)

mean_mse_fx = sum_fx_mse / n_points
mean_mse_fy = sum_fy_mse / n_points
mean_mse_g  = sum_g_mse  / n_points

# shift-invariant давление
# d̄ = mean(p_pred - p_true)
mean_dp  = sum_dp / n_points
mean_dp2 = sum_dp2 / n_points
# MSE после вычитания оптимальной константы
mse_p_shift = mean_dp2 - mean_dp ** 2
mse_p_shift = max(mse_p_shift, 0.0) # на всякий случай от численных артефактов


print("=== Test metrics ===")
print(f"MSE(u): {mean_mse_u:.6e}")
print(f"MSE(v): {mean_mse_v:.6e}")
print(f"MSE(p̂ vs p) RAW: {mean_mse_p_raw:.6e}")
print(f"MSE(p̂ vs p) shift-invariant: {mse_p_shift:.6e}")
print(f"rel L2(u): {rel_l2_u:.6e}")
print(f"rel L2(v): {rel_l2_v:.6e}")
print()
print("Physics residuals (RMS):")
print(f"NS fx RMS: {math.sqrt(mean_mse_fx):.6e}")
print(f"NS fy RMS: {math.sqrt(mean_mse_fy):.6e}")
print(f"Vorticity RMS: {math.sqrt(mean_mse_g):.6e}")
print()
print("Physics residuals (max over test):")
print(f"max |fx|: {max_fx:.6e}")
print(f"max |fy|: {max_fy:.6e}")
print(f"max |g_vort|: {max_g:.6e}")
print(f"optimal pressure shift c* = {-mean_dp:.6e}")

eval on test:   0%|          | 0/61 [00:00<?, ?it/s]

=== Test metrics ===
MSE(u): 1.538861e-05
MSE(v): 1.998927e-05
MSE(p̂ vs p) RAW: 2.018511e-01
MSE(p̂ vs p) shift-invariant: 2.237757e-04
rel L2(u): 9.187068e-03
rel L2(v): 1.045586e-02

Physics residuals (RMS):
NS fx RMS: 9.679624e-02
NS fy RMS: 1.771966e-01
Vorticity RMS: 1.720022e+01

Physics residuals (max over test):
max |fx|: 1.344740e+00
max |fy|: 2.575957e+00
max |g_vort|: 2.565495e+02
optimal pressure shift c* = -4.490294e-01
