In [1]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn.functional as F
from torch.distributions import Uniform
from torch.optim.lr_scheduler import ReduceLROnPlateau, ExponentialLR, OneCycleLR
from torch_poly_lr_decay import PolynomialLRDecay

import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.loggers import WandbLogger
from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import pandas as pd
import matplotlib.colors as colors

import wandb

PATH_DATASETS = "."
AVAIL_GPUS = min(1, torch.cuda.device_count())
BATCH_SIZE = 256 if AVAIL_GPUS else 64
# BATCH_SIZE=1

plt.rc('font', size=20)        # 기본 폰트 크기
plt.rc('axes', labelsize=20)   # x,y축 label 폰트 크기
plt.rc('xtick', labelsize=20)  # x축 눈금 폰트 크기 
plt.rc('ytick', labelsize=20)  # y축 눈금 폰트 크기
plt.rc('legend', fontsize=20)  # 범례 폰트 크기
plt.rc('figure', titlesize=20) # figure title 폰트 크기

In [2]:
pl.seed_everything(8407)

Global seed set to 8407


8407

In [3]:
class ToyData(Dataset):
    def __init__(self, p_A, p_a, p_B, p_b, q_C, q_c, y):
        self.X = torch.column_stack([p_A, p_a, p_B, p_b])
        self.y = y
        self.q_C = q_C
        self.q_c = q_c
    
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return self.X[idx], self.q_C[idx], self.q_c[idx], self.y[idx]

## Data Loading

In [4]:
data_1 = np.load('./data/toy_array_new.npz')
data_2 = np.load('./data/1200900700_array.npz')

In [5]:
def np_mass_sq(p):
    return p[:,0]**2 - p[:,1]**2 - p[:,2]**2 - p[:,3]**2

g = np.array([1,-1,-1,-1])
def Mass(p, ax=1):
    return torch.sqrt(p[:,0]**2 - p[:,1]**2 - p[:,2]**2 - p[:,3]**2)

def npMass(p, ax=1):
    return np.sqrt(p[:,0]**2 - p[:,1]**2 - p[:,2]**2 - p[:,3]**2)

In [6]:
scaler = 1000
def pre_process(data_1, data_2, scaler):
    N = min(data_1['b1'].shape[0], data_2['b1'].shape[0])
    rand_idx_1 = np.random.randint(data_1['b1'].shape[0], size=N)
    rand_idx_2 = np.random.randint(data_2['b1'].shape[0], size=N)
    
    pa11 = data_1['b1'][rand_idx_1,:] / scaler
    pa12 = data_1['b2'][rand_idx_1,:] / scaler
    pb11 = data_1['l2'][rand_idx_1,:] / scaler
    pb12 = data_1['l1'][rand_idx_1,:] / scaler
    qc11 = data_1['nu1'][rand_idx_1,:] / scaler
    qc12 = data_1['nu2'][rand_idx_1,:] / scaler
    y1 = np.repeat(0, pa11.shape[0]).reshape((-1, 1))
    
    pa21 = data_2['b1'][rand_idx_2,:] / scaler
    pa22 = data_2['b2'][rand_idx_2,:] / scaler
    pb21 = data_2['l2'][rand_idx_2,:] / scaler
    pb22 = data_2['l1'][rand_idx_2,:] / scaler
    qc21 = data_2['nu1'][rand_idx_2,:] / scaler
    qc22 = data_2['nu2'][rand_idx_2,:] / scaler
    y2 = np.repeat(1, pa21.shape[0]).reshape((-1, 1))
    
    np_momenta_1 = [pa11, pa12, pb11, pb12, qc11, qc12, y1]
    np_momenta_2 = [pa21, pa22, pb21, pb22, qc21, qc22, y2]
    
    X = list(map(lambda x: np.row_stack((x[0],x[1])), zip(np_momenta_1, np_momenta_2)))
    
    torch_momenta = list(map(lambda x: torch.tensor(x, dtype=torch.float32), X))
    
    ds = ToyData(*torch_momenta)
    
    return ds

In [7]:
ds = pre_process(data_1, data_2, scaler)

In [8]:
len(ds)

20000

In [9]:
ds[1]

(tensor([ 0.2411, -0.0433, -0.1255,  0.2012,  0.2963, -0.0293,  0.1904,  0.2251,
          0.2542,  0.1916, -0.0917,  0.1398,  0.0466, -0.0209,  0.0414,  0.0045]),
 tensor([ 1.3007,  0.6305, -0.1054,  0.8906]),
 tensor([ 2.2242, -0.7285,  0.0907,  1.9794]),
 tensor([0.]))

## Network

In [10]:
class ToyNet(pl.LightningModule):
    def __init__(self, hparams=None):
        super().__init__()
        
        hidden_layer       = hparams["hidden_layer"]
        hidden_depth       = hparams["hidden_depth"]
        learning_rate      = hparams["learning_rate"]
        batch_size         = hparams["batch_size"]
        
        self.hidden_layer  = hidden_layer
        self.hidden_depth  = hidden_depth
        self.learning_rate = learning_rate
        self.batch_size    = batch_size
        self.epochs        = hparams["epochs"]
        self.gamma        = hparams["gamma"]
        self.max_lr        = hparams['max_lr']
        self.learn_mode    = hparams['learn_mode'] # for pT mC loss on off
        self.learn_mode_sq = hparams['learn_mode_sq'] # sq or sqrt
        
        m1_C = torch.tensor(hparams["m1_C_init"])
        m1_B = m1_C + torch.tensor(hparams["m1_B_add"])
        m1_A = m1_B + torch.tensor(hparams["m1_A_add"])
        
        m2_C = torch.tensor(hparams["m2_C_init"])
        m2_B = m2_C + torch.tensor(hparams["m2_B_add"])
        m2_A = m2_B + torch.tensor(hparams["m2_A_add"])
        
        if self.learn_mode_sq == 'sq':
            m1_C = m1_C ** 2
            m1_B = m1_B ** 2
            m1_A = m1_A ** 2
            m2_C = m2_C ** 2
            m2_B = m2_B ** 2
            m2_A = m2_A ** 2
        
        self.m1_C = m1_C
        self.m1_B = nn.Parameter(m1_B, requires_grad=True)
        self.m1_A = nn.Parameter(m1_A, requires_grad=True)
        self.m2_C = m2_C
        self.m2_B = nn.Parameter(m2_B, requires_grad=True)
        self.m2_A = nn.Parameter(m2_A, requires_grad=True)
        
        layers = [nn.Linear(16, hidden_layer), nn.ReLU(inplace=True), nn.BatchNorm1d(hidden_layer)]
        for i in range(hidden_depth):
            layers.extend([
                nn.Linear(hidden_layer, hidden_layer),
                nn.ReLU(inplace=True),
                nn.BatchNorm1d(hidden_layer)
            ])
            
        if self.learn_mode == 'pt_mc':
            layers.append(nn.Linear(hidden_layer, 8))
        elif self.learn_mode in ['pt', 'mc']:
            layers.append(nn.Linear(hidden_layer, 6))
        elif self.learn_mode == None:
            layers.append(nn.Linear(hidden_layer, 4))
                
        self.net = nn.Sequential(*layers)
        
        self.save_hyperparameters(hparams)
        
        self.ds = None
        
    def forward(self, x):
        return self.net(x)
            
    def training_step(self, batch, batch_idx):
        x, _, _, y = batch
        pa1 = x[:,0:4]
        pa2 = x[:,4:8]
        pb1 = x[:,8:12]
        pb2 = x[:,12:16]
        
        y_0_ics = y == 0
        y_1_ics = y == 1
        
        m_C = torch.zeros_like(y)
        m_C[y_0_ics] = self.m1_C
        m_C[y_1_ics] = self.m2_C
        
        q = self(x)
        if self.learn_mode == 'pt_mc':
            qc1 = q[:,0:4]
            qc2 = q[:,4:8]
        elif self.learn_mode == 'pt':
            qx1 = q[:,0:1] 
            qy1 = q[:,1:2]
            qx2 = q[:,2:3] 
            qy2 = q[:,3:4]
            qz1 = q[:,4:5]   
            qz2 = q[:,5:6]          
            
            Eq1 = torch.sqrt(m_C**2 + qx1**2 + qy1**2 + qz1**2)
            Eq2 = torch.sqrt(m_C**2 + qx2**2 + qy2**2 + qz2**2)    
            
            qc1  = torch.cat([Eq1,qx1,qy1,qz1], 1)
            qc2  = torch.cat([Eq2,qx2,qy2,qz2], 1) 
            
        elif self.learn_mode == 'mc':
            qx1 = q[:,0:1] 
            qy2 = q[:,1:2]
            qz1 = q[:,2:3]   
            qz2 = q[:,3:4]
            Eq1 = q[:,4:5]
            Eq2 = q[:,5:6]            

            pTx = x[:,1:2]+x[:,5:6]+x[:,9:10]+x[:,13:14]
            pTy = x[:,2:3]+x[:,6:7]+x[:,10:11]+x[:,14:15]

            qx2 = -pTx-qx1
            qy1 = -pTy-qy2

            qc1  = torch.cat([Eq1,qx1,qy1,qz1], 1)
            qc2  = torch.cat([Eq2,qx2,qy2,qz2], 1)    
            
        elif self.learn_mode == None:
            qx1 = q[:,0:1] 
            qy2 = q[:,1:2]
            qz1 = q[:,2:3]   
            qz2 = q[:,3:4]

            pTx = x[:,1:2]+x[:,5:6]+x[:,9:10]+x[:,13:14]
            pTy = x[:,2:3]+x[:,6:7]+x[:,10:11]+x[:,14:15]

            qx2 = -pTx-qx1
            qy1 = -pTy-qy2

            Eq1 = torch.sqrt(m_C**2 + qx1**2 + qy1**2 + qz1**2)
            Eq2 = torch.sqrt(m_C**2 + qx2**2 + qy2**2 + qz2**2)

            qc1  = torch.cat([Eq1,qx1,qy1,qz1], 1)
            qc2  = torch.cat([Eq2,qx2,qy2,qz2], 1)        
        
        pB1 = pb1 + qc1
        pB2 = pb2 + qc2
        pA1 = pa1 + pB1
        pA2 = pa2 + pB2
        pT = (pA1 + pA2)[:,1:3]

        if self.learn_mode_sq == 'sq':
            mC1_sq = np_mass_sq(qc1)
            mC2_sq = np_mass_sq(qc2)
            mB1_sq = np_mass_sq(pB1)
            mB2_sq = np_mass_sq(pB2)
            mA1_sq = np_mass_sq(pA1)
            mA2_sq = np_mass_sq(pA2)

        elif self.learn_mode_sq == 'sqrt':
            mC1_sq = Mass(qc1)
            mC2_sq = Mass(qc2)
            mB1_sq = Mass(pB1)
            mB2_sq = Mass(pB2)
            mA1_sq = Mass(pA1)
            mA2_sq = Mass(pA2)
        
        mCs = torch.ones_like(mC1_sq)
        mBs = torch.ones_like(mB1_sq)
        mAs = torch.ones_like(mA1_sq)
        mCs[y_0_ics[:,0]] *= self.m1_C
        mBs[y_0_ics[:,0]] *= self.m1_B
        mAs[y_0_ics[:,0]] *= self.m1_A
        mCs[y_1_ics[:,0]] *= self.m2_C
        mBs[y_1_ics[:,0]] *= self.m2_B
        mAs[y_1_ics[:,0]] *= self.m2_A

        loss_C = torch.abs(mC1_sq - mC2_sq) + torch.abs(mC1_sq - mCs) + torch.abs(mC2_sq - mCs)
        loss_B = torch.abs(mB1_sq - mB2_sq) + torch.abs(mB1_sq - mBs) + torch.abs(mB2_sq - mBs)
        loss_A = torch.abs(mA1_sq - mA2_sq) + torch.abs(mA1_sq - mAs) + torch.abs(mA2_sq - mAs)
        
        loss_pT = pT[:,0]**2 + pT[:,1]**2            
        
        loss_C = loss_C 
        loss_B = loss_B 
        loss_A = loss_A 
        
        if self.learn_mode == 'pt_mc':
            loss = (loss_A + loss_B + loss_C).mean() + loss_pT.mean()
        elif self.learn_mode == 'pt':
            loss = (loss_A + loss_B).mean() + loss_pT.mean()
        elif self.learn_mode == 'mc':
            loss = (loss_A + loss_B + loss_C).mean() 
        elif self.learn_mode == None:        
            loss = (loss_A + loss_B).mean() 
                
        return loss
    
    
    def validation_step(self, batch, batch_idx):
        x, _, _, y = batch
        pa1 = x[:,0:4]
        pa2 = x[:,4:8]
        pb1 = x[:,8:12]
        pb2 = x[:,12:16]
        
        y_0_ics = y == 0
        y_1_ics = y == 1
        
        m_C = torch.ones_like(y)
        m_C[y_0_ics] *= self.m1_C
        m_C[y_1_ics] *= self.m2_C
        
        q = self(x)
        if self.learn_mode == 'pt_mc':
            qc1 = q[:,0:4]
            qc2 = q[:,4:8]
        elif self.learn_mode == 'pt':
            qx1 = q[:,0:1] 
            qy1 = q[:,1:2]
            qx2 = q[:,2:3] 
            qy2 = q[:,3:4]
            qz1 = q[:,4:5]   
            qz2 = q[:,5:6]          
            
            Eq1 = torch.sqrt(m_C**2 + qx1**2 + qy1**2 + qz1**2)
            Eq2 = torch.sqrt(m_C**2 + qx2**2 + qy2**2 + qz2**2)    
            
            qc1  = torch.cat([Eq1,qx1,qy1,qz1], 1)
            qc2  = torch.cat([Eq2,qx2,qy2,qz2], 1) 
            
        elif self.learn_mode == 'mc':
            qx1 = q[:,0:1] 
            qy2 = q[:,1:2]
            qz1 = q[:,2:3]   
            qz2 = q[:,3:4]
            Eq1 = q[:,4:5]
            Eq2 = q[:,5:6]            

            pTx = x[:,1:2]+x[:,5:6]+x[:,9:10]+x[:,13:14]
            pTy = x[:,2:3]+x[:,6:7]+x[:,10:11]+x[:,14:15]

            qx2 = -pTx-qx1
            qy1 = -pTy-qy2

            qc1  = torch.cat([Eq1,qx1,qy1,qz1], 1)
            qc2  = torch.cat([Eq2,qx2,qy2,qz2], 1)    
            
        elif self.learn_mode == None:
            qx1 = q[:,0:1] 
            qy2 = q[:,1:2]
            qz1 = q[:,2:3]   
            qz2 = q[:,3:4]

            pTx = x[:,1:2]+x[:,5:6]+x[:,9:10]+x[:,13:14]
            pTy = x[:,2:3]+x[:,6:7]+x[:,10:11]+x[:,14:15]

            qx2 = -pTx-qx1
            qy1 = -pTy-qy2

            Eq1 = torch.sqrt(m_C**2 + qx1**2 + qy1**2 + qz1**2)
            Eq2 = torch.sqrt(m_C**2 + qx2**2 + qy2**2 + qz2**2)

            qc1  = torch.cat([Eq1,qx1,qy1,qz1], 1)
            qc2  = torch.cat([Eq2,qx2,qy2,qz2], 1)        
        
        pB1 = pb1 + qc1
        pB2 = pb2 + qc2
        pA1 = pa1 + pB1
        pA2 = pa2 + pB2
        pT = (pA1 + pA2)[:,1:3]

        if self.learn_mode_sq == 'sq':
            mC1_sq = np_mass_sq(qc1)
            mC2_sq = np_mass_sq(qc2)
            mB1_sq = np_mass_sq(pB1)
            mB2_sq = np_mass_sq(pB2)
            mA1_sq = np_mass_sq(pA1)
            mA2_sq = np_mass_sq(pA2)

        elif self.learn_mode_sq == 'sqrt':
            mC1_sq = Mass(qc1)
            mC2_sq = Mass(qc2)
            mB1_sq = Mass(pB1)
            mB2_sq = Mass(pB2)
            mA1_sq = Mass(pA1)
            mA2_sq = Mass(pA2)
        
        mCs = torch.ones_like(mC1_sq)
        mBs = torch.ones_like(mB1_sq)
        mAs = torch.ones_like(mA1_sq)
        mCs[y_0_ics[:,0]] *= self.m1_C
        mBs[y_0_ics[:,0]] *= self.m1_B
        mAs[y_0_ics[:,0]] *= self.m1_A
        mCs[y_1_ics[:,0]] *= self.m2_C
        mBs[y_1_ics[:,0]] *= self.m2_B
        mAs[y_1_ics[:,0]] *= self.m2_A

        loss_C = torch.abs(mC1_sq - mC2_sq) + torch.abs(mC1_sq - mCs) + torch.abs(mC2_sq - mCs)
        loss_B = torch.abs(mB1_sq - mB2_sq) + torch.abs(mB1_sq - mBs) + torch.abs(mB2_sq - mBs)
        loss_A = torch.abs(mA1_sq - mA2_sq) + torch.abs(mA1_sq - mAs) + torch.abs(mA2_sq - mAs)
        
        loss_pT = pT[:,0]**2 + pT[:,1]**2            
        
        loss_C = loss_C 
        loss_B = loss_B 
        loss_A = loss_A 
        
        if self.learn_mode == 'pt_mc':
            loss = (loss_A + loss_B + loss_C).mean() + loss_pT.mean()
        elif self.learn_mode == 'pt':
            loss = (loss_A + loss_B).mean() + loss_pT.mean()
        elif self.learn_mode == 'mc':
            loss = (loss_A + loss_B + loss_C).mean() 
        elif self.learn_mode == None:        
            loss = (loss_A + loss_B).mean() 

        loss_C = torch.abs(mC1_sq - mC2_sq) + torch.abs(mC1_sq - mCs) + torch.abs(mC2_sq - mCs)
        loss_B = torch.abs(mB1_sq - mB2_sq) + torch.abs(mB1_sq - mBs) + torch.abs(mB2_sq - mBs)
        loss_A = torch.abs(mA1_sq - mA2_sq) + torch.abs(mA1_sq - mAs) + torch.abs(mA2_sq - mAs)
        
        loss_pT = pT[:,0]**2 + pT[:,1]**2            
        
        loss_C = loss_C 
        loss_B = loss_B 
        loss_A = loss_A 
        
        if self.learn_mode == 'pt_mc':
            loss = (loss_A + loss_B + loss_C).mean() + loss_pT.mean()
        elif self.learn_mode == 'pt':
            loss = (loss_A + loss_B).mean() + loss_pT.mean()
        elif self.learn_mode == 'mc':
            loss = (loss_A + loss_B + loss_C).mean() 
        elif self.learn_mode == None:        
            loss = (loss_A + loss_B).mean() 
        
        self.log('val_loss', loss)
        self.log('loss_A', loss_A)
        self.log('loss_B', loss_B)
        self.log('loss_C', loss_C)
        self.log('loss_pT', loss_pT)
        
        m1_A1 = mA1_sq[y_0_ics[:,0]].mean()
        m1_A2 = mA2_sq[y_0_ics[:,0]].mean()
        m1_B1 = mB1_sq[y_0_ics[:,0]].mean()
        m1_B2 = mB2_sq[y_0_ics[:,0]].mean()
        m1_C1 = mC1_sq[y_0_ics[:,0]].mean()
        m1_C2 = mC2_sq[y_0_ics[:,0]].mean()
        m2_A1 = mA1_sq[y_1_ics[:,0]].mean()
        m2_A2 = mA2_sq[y_1_ics[:,0]].mean()
        m2_B1 = mB1_sq[y_1_ics[:,0]].mean()
        m2_B2 = mB2_sq[y_1_ics[:,0]].mean()
        m2_C1 = mC1_sq[y_1_ics[:,0]].mean()
        m2_C2 = mC2_sq[y_1_ics[:,0]].mean()
        
        self.log('m1_A', self.m1_A)
        self.log('m1_B', self.m1_B)
        self.log('m1_C', self.m1_C)
        self.log('m1_A1', m1_A1)
        self.log('m1_A2', m1_A2)
        self.log('m1_B1', m1_B1)
        self.log('m1_B2', m1_B2)
        self.log('m1_C1', m1_C1)
        self.log('m1_C2', m1_C2)

        self.log('m2_A', self.m2_A)
        self.log('m2_B', self.m2_B)
        self.log('m2_C', self.m2_C)
        self.log('m2_A1', m2_A1)
        self.log('m2_A2', m2_A2)
        self.log('m2_B1', m2_B1)
        self.log('m2_B2', m2_B2)
        self.log('m2_C1', m2_C1)
        self.log('m2_C2', m2_C2)
        
        return loss
    
    def configure_optimizers(self):
        optimizer = optim.Adam(
            self.parameters(),
            lr=self.learning_rate,
#             betas=(0.99, 0.9999),
#             weight_decay=0.1
        )
        
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": OneCycleLR(
                    optimizer, 
                    max_lr=self.max_lr,
                    steps_per_epoch=len(self.ds_train) // self.batch_size + 1,
                    epochs = self.epochs,
                ),
                "interval": "step",
                "monitor": "val_loss",
                "strict": True,
            }
        } 

        
    def prepare_data(self):
        self.ds = ds
        self.N = len(self.ds)
        
    def setup(self, stage=None):
        N_train = self.N // 10 * 7
        N_val = self.N - N_train
        if stage == "fit" or stage is None:
            self.ds_train, self.ds_val = random_split(self.ds, [N_train, N_val])
        if stage == "test" or stage is None:
            _, self.ds_test = random_split(self.ds, [N_train, N_val])
    
    def train_dataloader(self):
        return DataLoader(self.ds_train, batch_size=self.batch_size)
    
    def val_dataloader(self):
        return DataLoader(self.ds_val, batch_size=self.batch_size)
    
    def test_dataloader(self):
        return DataLoader(self.ds_test, batch_size=self.batch_size)

In [11]:
pl.seed_everything(8407)

Global seed set to 8407


8407

## Hyper-parameter Setting

In [12]:
mc = 700/scaler 

hparams = {
    "hidden_layer": 256,
    "hidden_depth": 5,
    "learning_rate": 1e-3,
    "batch_size": BATCH_SIZE,
    "m1_C_init": mc,
    "m1_B_add": 0.3,
    "m1_A_add": 0.3,
    "m2_C_init": mc,
    "m2_B_add": 0.3,
    "m2_A_add": 0.3,
    "max_lr": 1e-4,
    "epochs": 300,
    "gamma": 0.9,
    "learn_mode": None, # 'pt_mc', 'mc', 'pt', None
    "learn_mode_sq":'sqrt', # 'sq' , 'sqrt' // Note: For 'sqrt', ONLY 'pt' and None are available for the physical reason.
}

In [13]:
model = ToyNet(
    hparams=hparams
)

wandb_logger = WandbLogger(
    project='Auxiliary_Mass'
)

trainer = Trainer(
    logger=wandb_logger,
    max_epochs=hparams["epochs"],
    gpus=AVAIL_GPUS,
    enable_progress_bar=False,
    callbacks=[
#         EarlyStopping(monitor="val_loss", patience=20, mode="min"),
        LearningRateMonitor(logging_interval="step")
    ]
)

[34m[1mwandb[0m: Currently logged in as: [33maxect[0m. Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


## Learning

In [14]:
trainer.fit(model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type       | Params
------------------------------------
0 | net  | Sequential | 337 K 
------------------------------------
337 K     Trainable params
0         Non-trainable params
337 K     Total params
1.350     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


In [15]:
trainer.save_checkpoint('Aux_Toy.pth')
wandb.save('Aux_Toy.pth')

['/home/xteca/Documents/Project/Research/NewMissing/wandb/run-20221008_153342-fe0159q9/files/Aux_Toy.pth']

In [16]:
wandb.finish()

VBox(children=(Label(value='3.910 MB of 3.910 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss_A,█▇▆▅▄▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss_B,█▇▇▆▅▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss_C,██▇▇▆▆▆▅▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▂▁▁▁▁
loss_pT,█▇▇▇▆▆▆▅▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁
lr-Adam,▁▂▂▃▃▄▅▆▇▇███████▇▇▇▇▆▆▆▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁
m1_A,█████▇▇▆▆▅▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
m1_A1,▅▆▇████▆▆▆▅▄▃▃▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
m1_A2,▅▆▇███▇▇▇▅▅▄▄▂▂▂▁▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
m1_B,████▇▇▆▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,299.0
loss_A,0.15263
loss_B,0.09612
loss_C,0.0
loss_pT,0.0
lr-Adam,0.0
m1_A,0.99968
m1_A1,1.00925
m1_A2,1.00777
m1_B,0.80059
