In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
os.listdir('../input/cassava-leaf-disease-classification/')

['train_tfrecords',
 'sample_submission.csv',
 'test_tfrecords',
 'label_num_to_disease_map.json',
 'train_images',
 'train.csv',
 'test_images']

In [3]:
train = pd.read_csv('../input/cassava-leaf-disease-merged/merged.csv')
test = pd.read_csv('../input/cassava-leaf-disease-classification//sample_submission.csv')
label_map = pd.read_json('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json', orient='index')

## Directory settings

In [4]:
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    
TRAIN_PATH = '../input/cassava-leaf-disease-merged/train'
TEST_PATH = '../input/cassava-leaf-disease-classification/test_images'

## CFG

In [5]:
class CFG:
    debug = False
    apex = False
    print_freq = 100
    num_workers = 4
    model_name = 'mlp'
    size = 256
    scheduler = 'CosineAnnealingWarmRestarts'
    epochs = 30
    T_0 = 10
    lr = 1e-3
    min_lr = 1e-6
    batch_size = 256
    weight_decay = 1e-6
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    seed = 42
    target_size = 5
    target_col = 'label'
    n_fold = 5
    trn_fold = [0, 1, 2, 3, 4]
    train = True
    inference = False
    
if CFG.debug:
    CFG.epochs = 1
    train = train.sample(n=1000, random_state=CFG.seed).reset_index(drop=True)

## Library

In [6]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose
    )
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import timm

import warnings 
warnings.filterwarnings('ignore')

if CFG.apex:
    from apex import amp

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Utils

In [7]:
def get_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f}')
    
def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

## CV split

In [8]:
oof_B3ns = pd.read_csv('../input/041-create-oof-efficientnet-b3ns-data/oof_df_0.csv')
oof_se = pd.read_csv('../input/nb023-data/oof_df.csv')

oof_B3ns_ = oof_B3ns[['0', '1', '2', '3', '4']]
oof_se_ = oof_se[['0', '1', '2', '3', '4']]
oof_se_.columns = ['5', '6', '7', '8', '9']

train = pd.concat([oof_B3ns_, oof_se_, oof_se[['label']]], axis=1)
train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,label
0,3.826102e-15,4.340454e-16,5.234761e-11,1.000000e+00,4.762953e-14,1.522047e-14,9.014521e-16,1.139739e-09,1.000000e+00,3.116005e-13,3
1,9.538939e-01,1.037140e-02,1.156314e-06,8.508536e-07,3.573275e-02,9.599438e-01,7.439279e-03,1.127475e-06,8.668555e-07,3.261490e-02,4
2,4.627016e-18,1.426970e-14,1.898384e-14,1.000000e+00,4.604767e-15,3.705072e-17,5.054389e-14,2.114423e-13,1.000000e+00,2.999360e-14,3
3,4.606794e-07,1.124587e-05,1.638323e-02,9.827052e-01,8.998843e-04,3.833728e-07,9.185316e-06,1.194510e-02,9.875053e-01,5.400496e-04,3
4,6.382671e-06,1.690052e-05,4.985041e-04,1.611800e-05,9.994621e-01,4.209951e-06,7.615774e-06,2.362547e-04,8.609924e-06,9.997434e-01,2
...,...,...,...,...,...,...,...,...,...,...,...
26332,8.475993e-07,9.982687e-01,2.881717e-07,1.730106e-03,8.013854e-08,9.047382e-07,9.980873e-01,3.099606e-07,1.911314e-03,6.628071e-08,1
26333,4.107651e-02,9.103698e-01,1.831035e-02,2.917714e-03,2.732570e-02,2.457832e-02,9.492393e-01,9.367346e-03,1.033589e-03,1.578141e-02,1
26334,3.930135e-09,1.206950e-05,3.702277e-11,9.999880e-01,3.137060e-13,6.578338e-09,1.752745e-05,4.747458e-11,9.999825e-01,7.934944e-13,3
26335,3.089139e-07,2.266481e-06,4.496734e-04,9.995383e-01,9.492393e-06,1.146029e-07,8.421153e-07,5.965448e-04,9.994010e-01,1.521211e-06,3


In [9]:
folds = train.copy()
Fold = StratifiedKFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds[CFG.target_col])):
    folds.loc[val_index, 'fold'] = int(n)
folds['fold'] = folds['fold'].astype(int)
print(folds.groupby(['fold', CFG.target_col]).size())

fold  label
0     0         299
      1         695
      2         603
      3        3093
      4         578
1     0         299
      1         695
      2         603
      3        3093
      4         578
2     0         298
      1         695
      2         604
      3        3092
      4         578
3     0         298
      1         695
      2         604
      3        3092
      4         578
4     0         298
      1         696
      2         603
      3        3092
      4         578
dtype: int64


## Dataset

In [10]:
class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.labels = df['label'].values
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TRAIN_PATH}/{file_name}'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).long()
        return image, label
    
class StackingDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']]
        self.labels = df['label'].values
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        X = torch.tensor(self.df.iloc[idx]).float()
        y = torch.tensor(self.labels[idx]).long()
        return X, y
    
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

In [11]:
train_dataset = StackingDataset(train, transform=None)

for i in range(1):
    X, y = train_dataset[i]
    print(X)

tensor([3.8261e-15, 4.3405e-16, 5.2348e-11, 1.0000e+00, 4.7630e-14, 1.5220e-14,
        9.0145e-16, 1.1397e-09, 1.0000e+00, 3.1160e-13])


## Transforms

In [12]:
def get_transforms(*, data):
    
    if data == 'train':
        return Compose([
            RandomResizedCrop(CFG.size, CFG.size), 
            Transpose(p=0.5), 
            HorizontalFlip(p=0.5), 
            VerticalFlip(p=0.5), 
            ShiftScaleRotate(p=0.5), 
            Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
            ), 
            ToTensorV2(),
        ])
    
    elif data == 'valid':
        return Compose([
            Resize(CFG.size, CFG.size), 
            Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
            ), 
            ToTensorV2(),
        ])

In [13]:
# train_dataset = TrainDataset(train, transform=get_transforms(data='train'))

# for i in range(1):
#     image, label = train_dataset[i]
#     plt.imshow(image[0])
#     plt.title(f'label: {label}')
#     plt.show()

## MODEL

In [14]:
class CustomResNext(nn.Module):
    def __init__(self, model_name='resnext50_32x4d', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, CFG.target_size)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [15]:
class StackingModel(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size_1, hidden_size_2, dropout_ratio):
        super(StackingModel, self).__init__()
        
        self.Linear1 = nn.Linear(num_features, hidden_size_1)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout_ratio)
        
        self.Linear2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout_ratio)
        
        self.Linear3 = nn.Linear(hidden_size_2, num_targets)
        
    def forward(self, x):
        x = self.Linear1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        
        x = self.Linear2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        
        x = self.Linear3(x)
        
        return x

In [16]:
# model = StackingModel(num_features=10, num_targets=CFG.target_size, hidden_size_1=16, hidden_size_2=8, dropout_ratio=0.2)
# train_dataset = StackingDataset(train)
# train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, 
#                           num_workers=4, pin_memory=True, drop_last=True)

# for X, y in train_loader:
#     print(X.size())
#     output = model(X)
#     print(output)
#     break

In [17]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def asMinutes(s):
    """秒を分に変換する関数"""
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    """経過時間の測定と終了時間の予測を行う関数
    Parameters
    ----------
    since : float
        実験を始めた時刻
    percent : float
        実験が進んだ割合
        
    Returns
    -------
    s : 経過時間
    re : 終了までの時間の予測
    """
    now = time.time()
    s = now - since  # 経過時間の測定
    es = s / percent  # 終了時間の予測
    re = es - s  # 残り時間の予想
    return '%s (remain %s)' % (asMinutes(s), asMinutes(re))

def train_fn(train_loader, model, criterion, optimizer, epoch, shechduler, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0
    for step, (X, y) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        X = X.to(device)
        y = y.to(device)
        batch_size = y.size(0)
        y_preds = model(X)
        loss = criterion(y_preds, y)
        # record loss
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if CFG.apex:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else: 
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}]'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})'
                  'Elapsed {remain:s}' 
                  'Loss: {loss.val:.4f}({loss.avg:.4f})' 
                  'Grad: {grad_norm:.4f}  '
                  .format(epoch+1, step, len(train_loader), batch_time=batch_time, 
                          data_time=data_time, loss=losses, 
                          remain=timeSince(start, float(step+1)/len(train_loader)), 
                          grad_norm=grad_norm))
    return losses.avg

def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    preds = []
    start = end = time.time()
    for step, (X, y) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        X = X.to(device)
        y = y.to(device)
        batch_size = y.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(X)
        loss = criterion(y_preds, y)
        losses.update(loss.item(), batch_size)
        # record accuracy
        preds.append(y_preds.softmax(1).to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
            
    predictions = np.concatenate(preds)
    return losses.avg, predictions

def inference(model, states, test_loader, device):
    model.to(device)
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avgpreds = []
        for state in states:
            model.load_state_dict(state['model'])
            model.eval()
            with torch.no_grad():
                y_preds = model(images)
            avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

## Train loop

In [18]:
# ======================================================
# Train loop
# ======================================================

def train_loop(folds, fold):
    
    LOGGER.info(f'========== fold: {fold} training ============')
    
    # ======================================================
    # loader
    # ======================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index
    
    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    
    train_dataset = StackingDataset(train_folds)
    valid_dataset = StackingDataset(valid_folds)
    
    train_loader = DataLoader(train_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=True, 
                              num_workers=CFG.num_workers, 
                              pin_memory=True, 
                              drop_last=False)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=False, 
                              num_workers=CFG.num_workers, 
                              pin_memory=True, 
                              drop_last=False)
    
    # ===============================================
    # scheduler
    # ===============================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler
    
    # ===============================================
    # model & optimizer
    # ===============================================
    model = StackingModel(num_features=10, num_targets=CFG.target_size, hidden_size_1=16, hidden_size_2=8, dropout_ratio=0.1)
    model.to(device)
    
    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)
    
    # ===============================================
    # apex 
    # ===============================================
    if CFG.apex:
        model.optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
        
    # ===============================================
    # loop
    # ===============================================
    criterion = nn.CrossEntropyLoss()
    
    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)
        
        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        valid_labels = valid_folds[CFG.target_col].values
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()
        
        # scoring
        score = get_score(valid_labels, preds.argmax(1))
        
        elapsed = time.time() - start_time
        
        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Accuracy: {score}')
        
        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Mpdel')
            torch.save({'model': model.state_dict(), 
                        'preds': preds}, 
                        OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    valid_folds[[str(c) for c in range(5)]] = check_point['preds']
    valid_folds['preds'] = check_point['preds'].argmax(1)
    
    return valid_folds

In [19]:
# ====================================================
# main
# ====================================================
def main():
    
    """
    Prepare: 1.train 2.test 3.submission 4.folds
    """
    
    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.5f}')
        
    if CFG.train:
        # train
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(folds, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f'=============== fold: {fold} result ================')
                get_result(_oof_df)
        # CV result
        LOGGER.info(f'============ CV ============')
        get_result(oof_df)
        # save result
        oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)
        
    if CFG.inference:
        # inference
        model = CustomResNext(CFG.model_name, pretrained=False)
        states = [torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth') for fold in CFG.trn_fold]
        test_dataset = TestDataset(test, batch_size=CFG.batch_size, shuffle=False, pin_memory=True)
        predictions = inference(model, states, test_loader, device)
        # submission
        test['label'] = predictions.argmax(1)
        test[['image_id', 'label']].to_csv(OUTPUT_DIR+'submission.csv', index=False)

In [20]:
if __name__ == '__main__':
    main()



Epoch: [1][0/83]Data 0.205 (0.205)Elapsed 0m 0s (remain 0m 23s)Loss: 1.6910(1.6910)Grad: 0.5705  
Epoch: [1][82/83]Data 0.000 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 1.5003(1.5662)Grad: 0.5301  
EVAL: [0/21] Data 0.161 (0.161) Elapsed 0m 0s (remain 0m 3s) Loss: 1.4357(1.4357) 


Epoch 1 - avg_train_loss: 1.5662 avg_val_loss: 1.4282 time: 3s
Epoch 1 - Accuracy: 0.6249050873196659
Epoch 1 - Save Best Score: 0.6249 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 1.4453(1.4282) 
Epoch: [2][0/83]Data 0.186 (0.186)Elapsed 0m 0s (remain 0m 16s)Loss: 1.4508(1.4508)Grad: 0.5440  
Epoch: [2][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.9398(1.1923)Grad: 0.6268  
EVAL: [0/21] Data 0.202 (0.202) Elapsed 0m 0s (remain 0m 4s) Loss: 0.8508(0.8508) 


Epoch 2 - avg_train_loss: 1.1923 avg_val_loss: 0.8662 time: 3s
Epoch 2 - Accuracy: 0.8752847380410023
Epoch 2 - Save Best Score: 0.8753 Mpdel


EVAL: [20/21] Data 0.023 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.9522(0.8662) 
Epoch: [3][0/83]Data 0.190 (0.190)Elapsed 0m 0s (remain 0m 16s)Loss: 0.9146(0.9146)Grad: 0.6442  
Epoch: [3][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5689(0.7440)Grad: 0.3354  
EVAL: [0/21] Data 0.197 (0.197) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5334(0.5334) 


Epoch 3 - avg_train_loss: 0.7440 avg_val_loss: 0.5654 time: 3s
Epoch 3 - Accuracy: 0.873006833712984


EVAL: [20/21] Data 0.000 (0.026) Elapsed 0m 0s (remain 0m 0s) Loss: 0.6856(0.5654) 
Epoch: [4][0/83]Data 0.182 (0.182)Elapsed 0m 0s (remain 0m 15s)Loss: 0.7426(0.7426)Grad: 0.2621  
Epoch: [4][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4775(0.5948)Grad: 0.2176  
EVAL: [0/21] Data 0.172 (0.172) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4295(0.4295) 


Epoch 4 - avg_train_loss: 0.5948 avg_val_loss: 0.4701 time: 3s
Epoch 4 - Accuracy: 0.8971146545178436
Epoch 4 - Save Best Score: 0.8971 Mpdel


EVAL: [20/21] Data 0.002 (0.026) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5913(0.4701) 
Epoch: [5][0/83]Data 0.192 (0.192)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5751(0.5751)Grad: 0.1527  
Epoch: [5][82/83]Data 0.000 (0.020)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4198(0.5398)Grad: 0.1986  
EVAL: [0/21] Data 0.163 (0.163) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3796(0.3796) 


Epoch 5 - avg_train_loss: 0.5398 avg_val_loss: 0.4269 time: 3s
Epoch 5 - Accuracy: 0.9005315110098709
Epoch 5 - Save Best Score: 0.9005 Mpdel


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5392(0.4269) 
Epoch: [6][0/83]Data 0.161 (0.161)Elapsed 0m 0s (remain 0m 13s)Loss: 0.5022(0.5022)Grad: 0.1252  
Epoch: [6][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3614(0.5144)Grad: 0.3737  
EVAL: [0/21] Data 0.184 (0.184) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3590(0.3590) 


Epoch 6 - avg_train_loss: 0.5144 avg_val_loss: 0.4101 time: 3s
Epoch 6 - Accuracy: 0.90167046317388
Epoch 6 - Save Best Score: 0.9017 Mpdel


EVAL: [20/21] Data 0.027 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5187(0.4101) 
Epoch: [7][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5459(0.5459)Grad: 0.1410  
Epoch: [7][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6246(0.4984)Grad: 0.2425  
EVAL: [0/21] Data 0.184 (0.184) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3498(0.3498) 


Epoch 7 - avg_train_loss: 0.4984 avg_val_loss: 0.4029 time: 3s
Epoch 7 - Accuracy: 0.9028094153378892
Epoch 7 - Save Best Score: 0.9028 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5106(0.4029) 
Epoch: [8][0/83]Data 0.163 (0.163)Elapsed 0m 0s (remain 0m 13s)Loss: 0.3690(0.3690)Grad: 0.1190  
Epoch: [8][82/83]Data 0.002 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4567(0.4993)Grad: 0.2496  
EVAL: [0/21] Data 0.159 (0.159) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3451(0.3451) 


Epoch 8 - avg_train_loss: 0.4993 avg_val_loss: 0.3996 time: 3s
Epoch 8 - Accuracy: 0.9024297646165528


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5077(0.3996) 
Epoch: [9][0/83]Data 0.182 (0.182)Elapsed 0m 0s (remain 0m 15s)Loss: 0.3865(0.3865)Grad: 0.1600  
Epoch: [9][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5266(0.4998)Grad: 0.2343  
EVAL: [0/21] Data 0.159 (0.159) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3434(0.3434) 


Epoch 9 - avg_train_loss: 0.4998 avg_val_loss: 0.3985 time: 3s
Epoch 9 - Accuracy: 0.9028094153378892


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5066(0.3985) 
Epoch: [10][0/83]Data 0.192 (0.192)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5165(0.5165)Grad: 0.2065  
Epoch: [10][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4986(0.4977)Grad: 0.3323  
EVAL: [0/21] Data 0.200 (0.200) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3430(0.3430) 


Epoch 10 - avg_train_loss: 0.4977 avg_val_loss: 0.3982 time: 3s
Epoch 10 - Accuracy: 0.9028094153378892


EVAL: [20/21] Data 0.026 (0.031) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5062(0.3982) 
Epoch: [11][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5165(0.5165)Grad: 0.1111  
Epoch: [11][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5710(0.4916)Grad: 0.3053  
EVAL: [0/21] Data 0.260 (0.260) Elapsed 0m 0s (remain 0m 5s) Loss: 0.3320(0.3320) 


Epoch 11 - avg_train_loss: 0.4916 avg_val_loss: 0.3904 time: 3s
Epoch 11 - Accuracy: 0.90167046317388


EVAL: [20/21] Data 0.000 (0.033) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5037(0.3904) 
Epoch: [12][0/83]Data 0.194 (0.194)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5132(0.5132)Grad: 0.1555  
Epoch: [12][82/83]Data 0.000 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4377(0.4837)Grad: 0.3248  
EVAL: [0/21] Data 0.187 (0.187) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3278(0.3278) 


Epoch 12 - avg_train_loss: 0.4837 avg_val_loss: 0.3874 time: 3s
Epoch 12 - Accuracy: 0.9026195899772209


EVAL: [20/21] Data 0.003 (0.026) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5001(0.3874) 
Epoch: [13][0/83]Data 0.189 (0.189)Elapsed 0m 0s (remain 0m 16s)Loss: 0.6003(0.6003)Grad: 0.3099  
Epoch: [13][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2800(0.4827)Grad: 0.2308  
EVAL: [0/21] Data 0.162 (0.162) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3241(0.3241) 


Epoch 13 - avg_train_loss: 0.4827 avg_val_loss: 0.3856 time: 3s
Epoch 13 - Accuracy: 0.9022399392558846


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5035(0.3856) 
Epoch: [14][0/83]Data 0.182 (0.182)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5136(0.5136)Grad: 0.2522  
Epoch: [14][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4752(0.4763)Grad: 0.1958  
EVAL: [0/21] Data 0.162 (0.162) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3217(0.3217) 


Epoch 14 - avg_train_loss: 0.4763 avg_val_loss: 0.3848 time: 3s
Epoch 14 - Accuracy: 0.9018602885345482


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5077(0.3848) 
Epoch: [15][0/83]Data 0.186 (0.186)Elapsed 0m 0s (remain 0m 15s)Loss: 0.6052(0.6052)Grad: 0.1885  
Epoch: [15][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4449(0.4764)Grad: 0.3344  
EVAL: [0/21] Data 0.161 (0.161) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3206(0.3206) 


Epoch 15 - avg_train_loss: 0.4764 avg_val_loss: 0.3839 time: 3s
Epoch 15 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5109(0.3839) 
Epoch: [16][0/83]Data 0.189 (0.189)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4970(0.4970)Grad: 0.1572  
Epoch: [16][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4480(0.4730)Grad: 0.4046  
EVAL: [0/21] Data 0.160 (0.160) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3197(0.3197) 


Epoch 16 - avg_train_loss: 0.4730 avg_val_loss: 0.3832 time: 3s
Epoch 16 - Accuracy: 0.9011009870918755


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5121(0.3832) 
Epoch: [17][0/83]Data 0.179 (0.179)Elapsed 0m 0s (remain 0m 15s)Loss: 0.6261(0.6261)Grad: 0.2044  
Epoch: [17][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2718(0.4714)Grad: 0.2009  
EVAL: [0/21] Data 0.185 (0.185) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3188(0.3188) 


Epoch 17 - avg_train_loss: 0.4714 avg_val_loss: 0.3829 time: 3s
Epoch 17 - Accuracy: 0.9011009870918755


EVAL: [20/21] Data 0.000 (0.026) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5156(0.3829) 
Epoch: [18][0/83]Data 0.219 (0.219)Elapsed 0m 0s (remain 0m 18s)Loss: 0.3701(0.3701)Grad: 0.1691  
Epoch: [18][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.7210(0.4729)Grad: 0.4053  
EVAL: [0/21] Data 0.211 (0.211) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3189(0.3189) 


Epoch 18 - avg_train_loss: 0.4729 avg_val_loss: 0.3829 time: 3s
Epoch 18 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.011 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5158(0.3829) 
Epoch: [19][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4419(0.4419)Grad: 0.1820  
Epoch: [19][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5812(0.4754)Grad: 0.1854  
EVAL: [0/21] Data 0.181 (0.181) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3188(0.3188) 


Epoch 19 - avg_train_loss: 0.4754 avg_val_loss: 0.3827 time: 3s
Epoch 19 - Accuracy: 0.9009111617312073


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5161(0.3827) 
Epoch: [20][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4064(0.4064)Grad: 0.0792  
Epoch: [20][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4482(0.4700)Grad: 0.2562  
EVAL: [0/21] Data 0.217 (0.217) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3187(0.3187) 


Epoch 20 - avg_train_loss: 0.4700 avg_val_loss: 0.3827 time: 3s
Epoch 20 - Accuracy: 0.9009111617312073


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5164(0.3827) 
Epoch: [21][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5140(0.5140)Grad: 0.2085  
Epoch: [21][82/83]Data 0.000 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3962(0.4729)Grad: 0.2054  
EVAL: [0/21] Data 0.186 (0.186) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3171(0.3171) 


Epoch 21 - avg_train_loss: 0.4729 avg_val_loss: 0.3824 time: 3s
Epoch 21 - Accuracy: 0.9009111617312073


EVAL: [20/21] Data 0.019 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5228(0.3824) 
Epoch: [22][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4157(0.4157)Grad: 0.1469  
Epoch: [22][82/83]Data 0.000 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4963(0.4732)Grad: 0.2258  
EVAL: [0/21] Data 0.193 (0.193) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3175(0.3175) 


Epoch 22 - avg_train_loss: 0.4732 avg_val_loss: 0.3821 time: 3s
Epoch 22 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5200(0.3821) 
Epoch: [23][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5939(0.5939)Grad: 0.1407  
Epoch: [23][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4393(0.4727)Grad: 0.1999  
EVAL: [0/21] Data 0.190 (0.190) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3178(0.3178) 


Epoch 23 - avg_train_loss: 0.4727 avg_val_loss: 0.3822 time: 3s
Epoch 23 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5249(0.3822) 
Epoch: [24][0/83]Data 0.180 (0.180)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5048(0.5048)Grad: 0.1124  
Epoch: [24][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4664(0.4674)Grad: 0.2039  
EVAL: [0/21] Data 0.197 (0.197) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3154(0.3154) 


Epoch 24 - avg_train_loss: 0.4674 avg_val_loss: 0.3816 time: 3s
Epoch 24 - Accuracy: 0.9011009870918755


EVAL: [20/21] Data 0.011 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5286(0.3816) 
Epoch: [25][0/83]Data 0.168 (0.168)Elapsed 0m 0s (remain 0m 14s)Loss: 0.4401(0.4401)Grad: 0.1313  
Epoch: [25][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5234(0.4642)Grad: 0.2663  
EVAL: [0/21] Data 0.157 (0.157) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3156(0.3156) 


Epoch 25 - avg_train_loss: 0.4642 avg_val_loss: 0.3815 time: 3s
Epoch 25 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5330(0.3815) 
Epoch: [26][0/83]Data 0.166 (0.166)Elapsed 0m 0s (remain 0m 14s)Loss: 0.3397(0.3397)Grad: 0.1367  
Epoch: [26][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3831(0.4687)Grad: 0.3196  
EVAL: [0/21] Data 0.160 (0.160) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3163(0.3163) 


Epoch 26 - avg_train_loss: 0.4687 avg_val_loss: 0.3817 time: 3s
Epoch 26 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.008 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5313(0.3817) 
Epoch: [27][0/83]Data 0.187 (0.187)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4267(0.4267)Grad: 0.1018  
Epoch: [27][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4155(0.4701)Grad: 0.2234  
EVAL: [0/21] Data 0.185 (0.185) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3159(0.3159) 


Epoch 27 - avg_train_loss: 0.4701 avg_val_loss: 0.3816 time: 3s
Epoch 27 - Accuracy: 0.9014806378132119


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5318(0.3816) 
Epoch: [28][0/83]Data 0.185 (0.185)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4605(0.4605)Grad: 0.0783  
Epoch: [28][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4220(0.4682)Grad: 0.2242  
EVAL: [0/21] Data 0.217 (0.217) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3159(0.3159) 


Epoch 28 - avg_train_loss: 0.4682 avg_val_loss: 0.3816 time: 3s
Epoch 28 - Accuracy: 0.9014806378132119


EVAL: [20/21] Data 0.023 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5330(0.3816) 
Epoch: [29][0/83]Data 0.187 (0.187)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4306(0.4306)Grad: 0.1106  
Epoch: [29][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2727(0.4643)Grad: 0.1946  
EVAL: [0/21] Data 0.185 (0.185) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3159(0.3159) 


Epoch 29 - avg_train_loss: 0.4643 avg_val_loss: 0.3816 time: 3s
Epoch 29 - Accuracy: 0.9014806378132119


EVAL: [20/21] Data 0.002 (0.026) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5333(0.3816) 
Epoch: [30][0/83]Data 0.176 (0.176)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4437(0.4437)Grad: 0.1501  
Epoch: [30][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3690(0.4641)Grad: 0.1839  
EVAL: [0/21] Data 0.196 (0.196) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3159(0.3159) 


Epoch 30 - avg_train_loss: 0.4641 avg_val_loss: 0.3816 time: 3s
Epoch 30 - Accuracy: 0.9014806378132119
Score: 0.90281


EVAL: [20/21] Data 0.022 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5333(0.3816) 
Epoch: [1][0/83]Data 0.169 (0.169)Elapsed 0m 0s (remain 0m 14s)Loss: 1.6330(1.6330)Grad: 0.5404  
Epoch: [1][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 1.3924(1.5062)Grad: 0.5137  
EVAL: [0/21] Data 0.209 (0.209) Elapsed 0m 0s (remain 0m 4s) Loss: 1.3367(1.3367) 


Epoch 1 - avg_train_loss: 1.5062 avg_val_loss: 1.3490 time: 3s
Epoch 1 - Accuracy: 0.7488610478359908
Epoch 1 - Save Best Score: 0.7489 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 1.3860(1.3490) 
Epoch: [2][0/83]Data 0.198 (0.198)Elapsed 0m 0s (remain 0m 16s)Loss: 1.3285(1.3285)Grad: 0.6872  
Epoch: [2][82/83]Data 0.005 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.9156(1.1324)Grad: 0.4682  
EVAL: [0/21] Data 0.188 (0.188) Elapsed 0m 0s (remain 0m 3s) Loss: 0.8480(0.8480) 


Epoch 2 - avg_train_loss: 1.1324 avg_val_loss: 0.8503 time: 3s
Epoch 2 - Accuracy: 0.8392179195140471
Epoch 2 - Save Best Score: 0.8392 Mpdel


EVAL: [20/21] Data 0.004 (0.026) Elapsed 0m 0s (remain 0m 0s) Loss: 0.9314(0.8503) 
Epoch: [3][0/83]Data 0.184 (0.184)Elapsed 0m 0s (remain 0m 17s)Loss: 0.8686(0.8686)Grad: 0.5737  
Epoch: [3][82/83]Data 0.000 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5384(0.7064)Grad: 0.3090  
EVAL: [0/21] Data 0.160 (0.160) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5954(0.5954) 


Epoch 3 - avg_train_loss: 0.7064 avg_val_loss: 0.5544 time: 3s
Epoch 3 - Accuracy: 0.857630979498861
Epoch 3 - Save Best Score: 0.8576 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.6447(0.5544) 
Epoch: [4][0/83]Data 0.195 (0.195)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5650(0.5650)Grad: 0.3062  
Epoch: [4][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5602(0.5480)Grad: 0.2703  
EVAL: [0/21] Data 0.196 (0.196) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5540(0.5540) 


Epoch 4 - avg_train_loss: 0.5480 avg_val_loss: 0.4794 time: 3s
Epoch 4 - Accuracy: 0.8851556567957479
Epoch 4 - Save Best Score: 0.8852 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5635(0.4794) 
Epoch: [5][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5027(0.5027)Grad: 0.1667  
Epoch: [5][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5369(0.5114)Grad: 0.2987  
EVAL: [0/21] Data 0.253 (0.253) Elapsed 0m 0s (remain 0m 5s) Loss: 0.5493(0.5493) 


Epoch 5 - avg_train_loss: 0.5114 avg_val_loss: 0.4566 time: 3s
Epoch 5 - Accuracy: 0.8881928625664389
Epoch 5 - Save Best Score: 0.8882 Mpdel


EVAL: [20/21] Data 0.023 (0.031) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5409(0.4566) 
Epoch: [6][0/83]Data 0.176 (0.176)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5571(0.5571)Grad: 0.1513  
Epoch: [6][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5405(0.4931)Grad: 0.2378  
EVAL: [0/21] Data 0.185 (0.185) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5485(0.5485) 


Epoch 6 - avg_train_loss: 0.4931 avg_val_loss: 0.4449 time: 3s
Epoch 6 - Accuracy: 0.8885725132877752
Epoch 6 - Save Best Score: 0.8886 Mpdel


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5288(0.4449) 
Epoch: [7][0/83]Data 0.177 (0.177)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5638(0.5638)Grad: 0.2189  
Epoch: [7][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3523(0.4801)Grad: 0.2019  
EVAL: [0/21] Data 0.175 (0.175) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5472(0.5472) 


Epoch 7 - avg_train_loss: 0.4801 avg_val_loss: 0.4383 time: 3s
Epoch 7 - Accuracy: 0.8881928625664389


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5210(0.4383) 
Epoch: [8][0/83]Data 0.166 (0.166)Elapsed 0m 0s (remain 0m 14s)Loss: 0.4842(0.4842)Grad: 0.1169  
Epoch: [8][82/83]Data 0.002 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2841(0.4783)Grad: 0.2482  
EVAL: [0/21] Data 0.181 (0.181) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5452(0.5452) 


Epoch 8 - avg_train_loss: 0.4783 avg_val_loss: 0.4346 time: 3s
Epoch 8 - Accuracy: 0.8878132118451025


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5172(0.4346) 
Epoch: [9][0/83]Data 0.206 (0.206)Elapsed 0m 0s (remain 0m 17s)Loss: 0.5380(0.5380)Grad: 0.1442  
Epoch: [9][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4926(0.4740)Grad: 0.2838  
EVAL: [0/21] Data 0.183 (0.183) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5443(0.5443) 


Epoch 9 - avg_train_loss: 0.4740 avg_val_loss: 0.4330 time: 3s
Epoch 9 - Accuracy: 0.8878132118451025


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5152(0.4330) 
Epoch: [10][0/83]Data 0.190 (0.190)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4617(0.4617)Grad: 0.1139  
Epoch: [10][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3891(0.4765)Grad: 0.1932  
EVAL: [0/21] Data 0.175 (0.175) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5441(0.5441) 


Epoch 10 - avg_train_loss: 0.4765 avg_val_loss: 0.4326 time: 3s
Epoch 10 - Accuracy: 0.8878132118451025


EVAL: [20/21] Data 0.000 (0.029) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5147(0.4326) 
Epoch: [11][0/83]Data 0.182 (0.182)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4398(0.4398)Grad: 0.1334  
Epoch: [11][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3444(0.4637)Grad: 0.1890  
EVAL: [0/21] Data 0.204 (0.204) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5356(0.5356) 


Epoch 11 - avg_train_loss: 0.4637 avg_val_loss: 0.4202 time: 3s
Epoch 11 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.016 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5018(0.4202) 
Epoch: [12][0/83]Data 0.190 (0.190)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4099(0.4099)Grad: 0.1167  
Epoch: [12][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4889(0.4563)Grad: 0.3371  
EVAL: [0/21] Data 0.190 (0.190) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5294(0.5294) 


Epoch 12 - avg_train_loss: 0.4563 avg_val_loss: 0.4134 time: 3s
Epoch 12 - Accuracy: 0.8872437357630979


EVAL: [20/21] Data 0.003 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4937(0.4134) 
Epoch: [13][0/83]Data 0.185 (0.185)Elapsed 0m 0s (remain 0m 15s)Loss: 0.3547(0.3547)Grad: 0.1536  
Epoch: [13][82/83]Data 0.000 (0.027)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4378(0.4500)Grad: 0.2470  
EVAL: [0/21] Data 0.161 (0.161) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5249(0.5249) 


Epoch 13 - avg_train_loss: 0.4500 avg_val_loss: 0.4103 time: 3s
Epoch 13 - Accuracy: 0.8876233864844343


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4915(0.4103) 
Epoch: [14][0/83]Data 0.171 (0.171)Elapsed 0m 0s (remain 0m 14s)Loss: 0.4377(0.4377)Grad: 0.1163  
Epoch: [14][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5059(0.4508)Grad: 0.2242  
EVAL: [0/21] Data 0.211 (0.211) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5233(0.5233) 


Epoch 14 - avg_train_loss: 0.4508 avg_val_loss: 0.4085 time: 3s
Epoch 14 - Accuracy: 0.8876233864844343


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4865(0.4085) 
Epoch: [15][0/83]Data 0.187 (0.187)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4994(0.4994)Grad: 0.1500  
Epoch: [15][82/83]Data 0.002 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.1902(0.4457)Grad: 0.2103  
EVAL: [0/21] Data 0.166 (0.166) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5221(0.5221) 


Epoch 15 - avg_train_loss: 0.4457 avg_val_loss: 0.4081 time: 3s
Epoch 15 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4888(0.4081) 
Epoch: [16][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4239(0.4239)Grad: 0.1019  
Epoch: [16][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2694(0.4455)Grad: 0.2971  
EVAL: [0/21] Data 0.165 (0.165) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5224(0.5224) 


Epoch 16 - avg_train_loss: 0.4455 avg_val_loss: 0.4073 time: 3s
Epoch 16 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4865(0.4073) 
Epoch: [17][0/83]Data 0.210 (0.210)Elapsed 0m 0s (remain 0m 17s)Loss: 0.4831(0.4831)Grad: 0.1571  
Epoch: [17][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3627(0.4448)Grad: 0.2090  
EVAL: [0/21] Data 0.173 (0.173) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5222(0.5222) 


Epoch 17 - avg_train_loss: 0.4448 avg_val_loss: 0.4071 time: 3s
Epoch 17 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4862(0.4071) 
Epoch: [18][0/83]Data 0.179 (0.179)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5146(0.5146)Grad: 0.1704  
Epoch: [18][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5165(0.4456)Grad: 0.1678  
EVAL: [0/21] Data 0.207 (0.207) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5214(0.5214) 


Epoch 18 - avg_train_loss: 0.4456 avg_val_loss: 0.4068 time: 3s
Epoch 18 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.019 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4856(0.4068) 
Epoch: [19][0/83]Data 0.179 (0.179)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4915(0.4915)Grad: 0.1280  
Epoch: [19][82/83]Data 0.000 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4362(0.4430)Grad: 0.2038  
EVAL: [0/21] Data 0.195 (0.195) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5212(0.5212) 


Epoch 19 - avg_train_loss: 0.4430 avg_val_loss: 0.4068 time: 3s
Epoch 19 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.002 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4856(0.4068) 
Epoch: [20][0/83]Data 0.165 (0.165)Elapsed 0m 0s (remain 0m 14s)Loss: 0.3825(0.3825)Grad: 0.1952  
Epoch: [20][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.7168(0.4468)Grad: 0.3285  
EVAL: [0/21] Data 0.222 (0.222) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5212(0.5212) 


Epoch 20 - avg_train_loss: 0.4468 avg_val_loss: 0.4068 time: 3s
Epoch 20 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.000 (0.030) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4857(0.4068) 
Epoch: [21][0/83]Data 0.185 (0.185)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5326(0.5326)Grad: 0.1226  
Epoch: [21][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4833(0.4429)Grad: 0.2203  
EVAL: [0/21] Data 0.177 (0.177) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5207(0.5207) 


Epoch 21 - avg_train_loss: 0.4429 avg_val_loss: 0.4066 time: 3s
Epoch 21 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4870(0.4066) 
Epoch: [22][0/83]Data 0.192 (0.192)Elapsed 0m 0s (remain 0m 16s)Loss: 0.3085(0.3085)Grad: 0.1329  
Epoch: [22][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4011(0.4412)Grad: 0.4133  
EVAL: [0/21] Data 0.187 (0.187) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5178(0.5178) 


Epoch 22 - avg_train_loss: 0.4412 avg_val_loss: 0.4055 time: 3s
Epoch 22 - Accuracy: 0.8872437357630979


EVAL: [20/21] Data 0.002 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4838(0.4055) 
Epoch: [23][0/83]Data 0.162 (0.162)Elapsed 0m 0s (remain 0m 13s)Loss: 0.4383(0.4383)Grad: 0.1414  
Epoch: [23][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4820(0.4401)Grad: 0.2433  
EVAL: [0/21] Data 0.157 (0.157) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5176(0.5176) 


Epoch 23 - avg_train_loss: 0.4401 avg_val_loss: 0.4052 time: 3s
Epoch 23 - Accuracy: 0.8876233864844343


EVAL: [20/21] Data 0.031 (0.031) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4854(0.4052) 
Epoch: [24][0/83]Data 0.184 (0.184)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5027(0.5027)Grad: 0.1829  
Epoch: [24][82/83]Data 0.000 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5802(0.4400)Grad: 0.2560  
EVAL: [0/21] Data 0.193 (0.193) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5185(0.5185) 


Epoch 24 - avg_train_loss: 0.4400 avg_val_loss: 0.4047 time: 3s
Epoch 24 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4841(0.4047) 
Epoch: [25][0/83]Data 0.182 (0.182)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4473(0.4473)Grad: 0.1506  
Epoch: [25][82/83]Data 0.000 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3587(0.4410)Grad: 0.2598  
EVAL: [0/21] Data 0.192 (0.192) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5179(0.5179) 


Epoch 25 - avg_train_loss: 0.4410 avg_val_loss: 0.4045 time: 3s
Epoch 25 - Accuracy: 0.8872437357630979


EVAL: [20/21] Data 0.013 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4833(0.4045) 
Epoch: [26][0/83]Data 0.162 (0.162)Elapsed 0m 0s (remain 0m 13s)Loss: 0.3791(0.3791)Grad: 0.1017  
Epoch: [26][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4020(0.4433)Grad: 0.1787  
EVAL: [0/21] Data 0.186 (0.186) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5154(0.5154) 


Epoch 26 - avg_train_loss: 0.4433 avg_val_loss: 0.4041 time: 3s
Epoch 26 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.018 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4827(0.4041) 
Epoch: [27][0/83]Data 0.203 (0.203)Elapsed 0m 0s (remain 0m 17s)Loss: 0.3114(0.3114)Grad: 0.1237  
Epoch: [27][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2403(0.4335)Grad: 0.2003  
EVAL: [0/21] Data 0.188 (0.188) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5153(0.5153) 


Epoch 27 - avg_train_loss: 0.4335 avg_val_loss: 0.4038 time: 3s
Epoch 27 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.007 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4824(0.4038) 
Epoch: [28][0/83]Data 0.180 (0.180)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4368(0.4368)Grad: 0.1134  
Epoch: [28][82/83]Data 0.007 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3885(0.4382)Grad: 0.2926  
EVAL: [0/21] Data 0.164 (0.164) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5152(0.5152) 


Epoch 28 - avg_train_loss: 0.4382 avg_val_loss: 0.4036 time: 3s
Epoch 28 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4820(0.4036) 
Epoch: [29][0/83]Data 0.189 (0.189)Elapsed 0m 0s (remain 0m 16s)Loss: 0.3691(0.3691)Grad: 0.1463  
Epoch: [29][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3339(0.4382)Grad: 0.1999  
EVAL: [0/21] Data 0.161 (0.161) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5150(0.5150) 


Epoch 29 - avg_train_loss: 0.4382 avg_val_loss: 0.4035 time: 3s
Epoch 29 - Accuracy: 0.8874335611237661


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4819(0.4035) 
Epoch: [30][0/83]Data 0.160 (0.160)Elapsed 0m 0s (remain 0m 13s)Loss: 0.5455(0.5455)Grad: 0.1262  
Epoch: [30][82/83]Data 0.002 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5261(0.4358)Grad: 0.2458  
EVAL: [0/21] Data 0.256 (0.256) Elapsed 0m 0s (remain 0m 5s) Loss: 0.5151(0.5151) 


Epoch 30 - avg_train_loss: 0.4358 avg_val_loss: 0.4035 time: 3s
Epoch 30 - Accuracy: 0.8874335611237661
Score: 0.88857


EVAL: [20/21] Data 0.025 (0.030) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4821(0.4035) 
Epoch: [1][0/83]Data 0.176 (0.176)Elapsed 0m 0s (remain 0m 15s)Loss: 1.5399(1.5399)Grad: 0.6678  
Epoch: [1][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 1.1684(1.3755)Grad: 0.6130  
EVAL: [0/21] Data 0.189 (0.189) Elapsed 0m 0s (remain 0m 3s) Loss: 1.1444(1.1444) 


Epoch 1 - avg_train_loss: 1.3755 avg_val_loss: 1.1528 time: 3s
Epoch 1 - Accuracy: 0.587051452439719
Epoch 1 - Save Best Score: 0.5871 Mpdel


EVAL: [20/21] Data 0.010 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 1.2203(1.1528) 
Epoch: [2][0/83]Data 0.190 (0.190)Elapsed 0m 0s (remain 0m 16s)Loss: 1.1424(1.1424)Grad: 0.5860  
Epoch: [2][82/83]Data 0.002 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 1.0978(0.9551)Grad: 0.3773  
EVAL: [0/21] Data 0.195 (0.195) Elapsed 0m 0s (remain 0m 3s) Loss: 0.7808(0.7808) 


Epoch 2 - avg_train_loss: 0.9551 avg_val_loss: 0.7774 time: 3s
Epoch 2 - Accuracy: 0.5880007594456047
Epoch 2 - Save Best Score: 0.5880 Mpdel


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.8406(0.7774) 
Epoch: [3][0/83]Data 0.191 (0.191)Elapsed 0m 0s (remain 0m 17s)Loss: 0.7998(0.7998)Grad: 0.2854  
Epoch: [3][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6684(0.7010)Grad: 0.2086  
EVAL: [0/21] Data 0.223 (0.223) Elapsed 0m 0s (remain 0m 4s) Loss: 0.6295(0.6295) 


Epoch 3 - avg_train_loss: 0.7010 avg_val_loss: 0.6069 time: 3s
Epoch 3 - Accuracy: 0.859692424530093
Epoch 3 - Save Best Score: 0.8597 Mpdel


EVAL: [20/21] Data 0.015 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5905(0.6069) 
Epoch: [4][0/83]Data 0.171 (0.171)Elapsed 0m 0s (remain 0m 14s)Loss: 0.6622(0.6622)Grad: 0.2092  
Epoch: [4][82/83]Data 0.000 (0.027)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6495(0.5912)Grad: 0.2230  
EVAL: [0/21] Data 0.208 (0.208) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5554(0.5554) 


Epoch 4 - avg_train_loss: 0.5912 avg_val_loss: 0.5332 time: 3s
Epoch 4 - Accuracy: 0.8625403455477502
Epoch 4 - Save Best Score: 0.8625 Mpdel


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4877(0.5332) 
Epoch: [5][0/83]Data 0.198 (0.198)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5571(0.5571)Grad: 0.1311  
Epoch: [5][82/83]Data 0.005 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4688(0.5434)Grad: 0.2823  
EVAL: [0/21] Data 0.199 (0.199) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5168(0.5168) 


Epoch 5 - avg_train_loss: 0.5434 avg_val_loss: 0.4965 time: 3s
Epoch 5 - Accuracy: 0.8629200683501044
Epoch 5 - Save Best Score: 0.8629 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4392(0.4965) 
Epoch: [6][0/83]Data 0.174 (0.174)Elapsed 0m 0s (remain 0m 14s)Loss: 0.4694(0.4694)Grad: 0.1733  
Epoch: [6][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4945(0.5164)Grad: 0.2335  
EVAL: [0/21] Data 0.164 (0.164) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4950(0.4950) 


Epoch 6 - avg_train_loss: 0.5164 avg_val_loss: 0.4733 time: 3s
Epoch 6 - Accuracy: 0.8672868805771786
Epoch 6 - Save Best Score: 0.8673 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4077(0.4733) 
Epoch: [7][0/83]Data 0.165 (0.165)Elapsed 0m 0s (remain 0m 14s)Loss: 0.5223(0.5223)Grad: 0.1647  
Epoch: [7][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3919(0.5059)Grad: 0.1770  
EVAL: [0/21] Data 0.163 (0.163) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4836(0.4836) 


Epoch 7 - avg_train_loss: 0.5059 avg_val_loss: 0.4606 time: 3s
Epoch 7 - Accuracy: 0.8851338522878299
Epoch 7 - Save Best Score: 0.8851 Mpdel


EVAL: [20/21] Data 0.005 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3898(0.4606) 
Epoch: [8][0/83]Data 0.177 (0.177)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4806(0.4806)Grad: 0.1068  
Epoch: [8][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5484(0.4938)Grad: 0.2435  
EVAL: [0/21] Data 0.165 (0.165) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4772(0.4772) 


Epoch 8 - avg_train_loss: 0.4938 avg_val_loss: 0.4532 time: 3s
Epoch 8 - Accuracy: 0.8887412189101955
Epoch 8 - Save Best Score: 0.8887 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3797(0.4532) 
Epoch: [9][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4965(0.4965)Grad: 0.1262  
Epoch: [9][82/83]Data 0.002 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6138(0.4932)Grad: 0.2467  
EVAL: [0/21] Data 0.160 (0.160) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4742(0.4742) 


Epoch 9 - avg_train_loss: 0.4932 avg_val_loss: 0.4499 time: 3s
Epoch 9 - Accuracy: 0.8889310803113727
Epoch 9 - Save Best Score: 0.8889 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3753(0.4499) 
Epoch: [10][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4003(0.4003)Grad: 0.1459  
Epoch: [10][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5691(0.4936)Grad: 0.2040  
EVAL: [0/21] Data 0.258 (0.258) Elapsed 0m 0s (remain 0m 5s) Loss: 0.4735(0.4735) 


Epoch 10 - avg_train_loss: 0.4936 avg_val_loss: 0.4491 time: 3s
Epoch 10 - Accuracy: 0.8889310803113727


EVAL: [20/21] Data 0.012 (0.030) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3743(0.4491) 
Epoch: [11][0/83]Data 0.188 (0.188)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4208(0.4208)Grad: 0.1507  
Epoch: [11][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4545(0.4762)Grad: 0.2472  
EVAL: [0/21] Data 0.188 (0.188) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4535(0.4535) 


Epoch 11 - avg_train_loss: 0.4762 avg_val_loss: 0.4233 time: 3s
Epoch 11 - Accuracy: 0.8927283083349156
Epoch 11 - Save Best Score: 0.8927 Mpdel


EVAL: [20/21] Data 0.002 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3388(0.4233) 
Epoch: [12][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.3920(0.3920)Grad: 0.1646  
Epoch: [12][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3268(0.4616)Grad: 0.2651  
EVAL: [0/21] Data 0.161 (0.161) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4421(0.4421) 


Epoch 12 - avg_train_loss: 0.4616 avg_val_loss: 0.4099 time: 3s
Epoch 12 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3232(0.4099) 
Epoch: [13][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5108(0.5108)Grad: 0.1997  
Epoch: [13][82/83]Data 0.004 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3941(0.4527)Grad: 0.3154  
EVAL: [0/21] Data 0.199 (0.199) Elapsed 0m 0s (remain 0m 4s) Loss: 0.4394(0.4394) 


Epoch 13 - avg_train_loss: 0.4527 avg_val_loss: 0.4062 time: 3s
Epoch 13 - Accuracy: 0.8921587241313841


EVAL: [20/21] Data 0.000 (0.029) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3205(0.4062) 
Epoch: [14][0/83]Data 0.188 (0.188)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4285(0.4285)Grad: 0.1205  
Epoch: [14][82/83]Data 0.000 (0.026)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5403(0.4547)Grad: 0.2411  
EVAL: [0/21] Data 0.176 (0.176) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4384(0.4384) 


Epoch 14 - avg_train_loss: 0.4547 avg_val_loss: 0.4041 time: 3s
Epoch 14 - Accuracy: 0.8931080311372698
Epoch 14 - Save Best Score: 0.8931 Mpdel


EVAL: [20/21] Data 0.000 (0.030) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3193(0.4041) 
Epoch: [15][0/83]Data 0.203 (0.203)Elapsed 0m 0s (remain 0m 17s)Loss: 0.4235(0.4235)Grad: 0.1588  
Epoch: [15][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2866(0.4490)Grad: 0.2019  
EVAL: [0/21] Data 0.160 (0.160) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4376(0.4376) 


Epoch 15 - avg_train_loss: 0.4490 avg_val_loss: 0.4037 time: 3s
Epoch 15 - Accuracy: 0.8923485855325612


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3202(0.4037) 
Epoch: [16][0/83]Data 0.201 (0.201)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5415(0.5415)Grad: 0.1103  
Epoch: [16][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.7106(0.4422)Grad: 0.3851  
EVAL: [0/21] Data 0.198 (0.198) Elapsed 0m 0s (remain 0m 4s) Loss: 0.4379(0.4379) 


Epoch 16 - avg_train_loss: 0.4422 avg_val_loss: 0.4040 time: 3s
Epoch 16 - Accuracy: 0.8923485855325612


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3209(0.4040) 
Epoch: [17][0/83]Data 0.186 (0.186)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4588(0.4588)Grad: 0.1497  
Epoch: [17][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4575(0.4422)Grad: 0.1702  
EVAL: [0/21] Data 0.176 (0.176) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4370(0.4370) 


Epoch 17 - avg_train_loss: 0.4422 avg_val_loss: 0.4031 time: 3s
Epoch 17 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3212(0.4031) 
Epoch: [18][0/83]Data 0.185 (0.185)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5836(0.5836)Grad: 0.1697  
Epoch: [18][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4260(0.4450)Grad: 0.2734  
EVAL: [0/21] Data 0.190 (0.190) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4366(0.4366) 


Epoch 18 - avg_train_loss: 0.4450 avg_val_loss: 0.4030 time: 3s
Epoch 18 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.022 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3212(0.4030) 
Epoch: [19][0/83]Data 0.175 (0.175)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4123(0.4123)Grad: 0.1490  
Epoch: [19][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5841(0.4384)Grad: 0.2473  
EVAL: [0/21] Data 0.181 (0.181) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4368(0.4368) 


Epoch 19 - avg_train_loss: 0.4384 avg_val_loss: 0.4030 time: 3s
Epoch 19 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.023 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3209(0.4030) 
Epoch: [20][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4661(0.4661)Grad: 0.2060  
Epoch: [20][82/83]Data 0.006 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6884(0.4410)Grad: 0.3845  
EVAL: [0/21] Data 0.166 (0.166) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4368(0.4368) 


Epoch 20 - avg_train_loss: 0.4410 avg_val_loss: 0.4030 time: 3s
Epoch 20 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.031) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3208(0.4030) 
Epoch: [21][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4132(0.4132)Grad: 0.1537  
Epoch: [21][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3216(0.4414)Grad: 0.1985  
EVAL: [0/21] Data 0.239 (0.239) Elapsed 0m 0s (remain 0m 4s) Loss: 0.4370(0.4370) 


Epoch 21 - avg_train_loss: 0.4414 avg_val_loss: 0.4036 time: 3s
Epoch 21 - Accuracy: 0.8931080311372698


EVAL: [20/21] Data 0.022 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3206(0.4036) 
Epoch: [22][0/83]Data 0.182 (0.182)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5064(0.5064)Grad: 0.1507  
Epoch: [22][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4617(0.4389)Grad: 0.2466  
EVAL: [0/21] Data 0.160 (0.160) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4331(0.4331) 


Epoch 22 - avg_train_loss: 0.4389 avg_val_loss: 0.4011 time: 3s
Epoch 22 - Accuracy: 0.8931080311372698


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3191(0.4011) 
Epoch: [23][0/83]Data 0.182 (0.182)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4390(0.4390)Grad: 0.1176  
Epoch: [23][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3919(0.4366)Grad: 0.2025  
EVAL: [0/21] Data 0.167 (0.167) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4332(0.4332) 


Epoch 23 - avg_train_loss: 0.4366 avg_val_loss: 0.4017 time: 3s
Epoch 23 - Accuracy: 0.8931080311372698


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3199(0.4017) 
Epoch: [24][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4039(0.4039)Grad: 0.1405  
Epoch: [24][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2196(0.4369)Grad: 0.1627  
EVAL: [0/21] Data 0.177 (0.177) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4318(0.4318) 


Epoch 24 - avg_train_loss: 0.4369 avg_val_loss: 0.4007 time: 3s
Epoch 24 - Accuracy: 0.8934877539396241
Epoch 24 - Save Best Score: 0.8935 Mpdel


EVAL: [20/21] Data 0.028 (0.032) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3184(0.4007) 
Epoch: [25][0/83]Data 0.205 (0.205)Elapsed 0m 0s (remain 0m 17s)Loss: 0.4303(0.4303)Grad: 0.1196  
Epoch: [25][82/83]Data 0.000 (0.026)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3967(0.4358)Grad: 0.3599  
EVAL: [0/21] Data 0.164 (0.164) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4326(0.4326) 


Epoch 25 - avg_train_loss: 0.4358 avg_val_loss: 0.4017 time: 3s
Epoch 25 - Accuracy: 0.8929181697360926


EVAL: [20/21] Data 0.009 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3188(0.4017) 
Epoch: [26][0/83]Data 0.165 (0.165)Elapsed 0m 0s (remain 0m 14s)Loss: 0.4950(0.4950)Grad: 0.1726  
Epoch: [26][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4008(0.4362)Grad: 0.1750  
EVAL: [0/21] Data 0.179 (0.179) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4299(0.4299) 


Epoch 26 - avg_train_loss: 0.4362 avg_val_loss: 0.4007 time: 3s
Epoch 26 - Accuracy: 0.8936776153408013
Epoch 26 - Save Best Score: 0.8937 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3185(0.4007) 
Epoch: [27][0/83]Data 0.179 (0.179)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4609(0.4609)Grad: 0.1137  
Epoch: [27][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3971(0.4328)Grad: 0.2350  
EVAL: [0/21] Data 0.160 (0.160) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4292(0.4292) 


Epoch 27 - avg_train_loss: 0.4328 avg_val_loss: 0.3998 time: 3s
Epoch 27 - Accuracy: 0.8932978925384469


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3178(0.3998) 
Epoch: [28][0/83]Data 0.190 (0.190)Elapsed 0m 0s (remain 0m 16s)Loss: 0.3627(0.3627)Grad: 0.1333  
Epoch: [28][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4584(0.4338)Grad: 0.2664  
EVAL: [0/21] Data 0.162 (0.162) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4292(0.4292) 


Epoch 28 - avg_train_loss: 0.4338 avg_val_loss: 0.4002 time: 3s
Epoch 28 - Accuracy: 0.8932978925384469


EVAL: [20/21] Data 0.000 (0.031) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3184(0.4002) 
Epoch: [29][0/83]Data 0.189 (0.189)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4091(0.4091)Grad: 0.1074  
Epoch: [29][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3435(0.4358)Grad: 0.2577  
EVAL: [0/21] Data 0.236 (0.236) Elapsed 0m 0s (remain 0m 4s) Loss: 0.4291(0.4291) 


Epoch 29 - avg_train_loss: 0.4358 avg_val_loss: 0.4004 time: 3s
Epoch 29 - Accuracy: 0.8932978925384469


EVAL: [20/21] Data 0.022 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3188(0.4004) 
Epoch: [30][0/83]Data 0.171 (0.171)Elapsed 0m 0s (remain 0m 14s)Loss: 0.3456(0.3456)Grad: 0.1563  
Epoch: [30][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3738(0.4344)Grad: 0.2542  
EVAL: [0/21] Data 0.184 (0.184) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4290(0.4290) 


Epoch 30 - avg_train_loss: 0.4344 avg_val_loss: 0.4004 time: 3s
Epoch 30 - Accuracy: 0.8932978925384469
Score: 0.89368


EVAL: [20/21] Data 0.000 (0.031) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3188(0.4004) 
Epoch: [1][0/83]Data 0.195 (0.195)Elapsed 0m 0s (remain 0m 16s)Loss: 1.5610(1.5610)Grad: 0.5798  
Epoch: [1][82/83]Data 0.000 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 1.0323(1.3772)Grad: 0.7959  
EVAL: [0/21] Data 0.166 (0.166) Elapsed 0m 0s (remain 0m 3s) Loss: 1.1272(1.1272) 


Epoch 1 - avg_train_loss: 1.3772 avg_val_loss: 1.1213 time: 3s
Epoch 1 - Accuracy: 0.587051452439719
Epoch 1 - Save Best Score: 0.5871 Mpdel


EVAL: [20/21] Data 0.000 (0.029) Elapsed 0m 0s (remain 0m 0s) Loss: 1.2863(1.1213) 
Epoch: [2][0/83]Data 0.206 (0.206)Elapsed 0m 0s (remain 0m 17s)Loss: 1.1225(1.1225)Grad: 0.6429  
Epoch: [2][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.8393(0.9296)Grad: 0.2577  
EVAL: [0/21] Data 0.197 (0.197) Elapsed 0m 0s (remain 0m 3s) Loss: 0.8323(0.8323) 


Epoch 2 - avg_train_loss: 0.9296 avg_val_loss: 0.8035 time: 3s
Epoch 2 - Accuracy: 0.587051452439719


EVAL: [20/21] Data 0.015 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 1.0416(0.8035) 
Epoch: [3][0/83]Data 0.185 (0.185)Elapsed 0m 0s (remain 0m 15s)Loss: 0.8881(0.8881)Grad: 0.1639  
Epoch: [3][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.8843(0.7999)Grad: 0.2765  
EVAL: [0/21] Data 0.160 (0.160) Elapsed 0m 0s (remain 0m 3s) Loss: 0.7739(0.7739) 


Epoch 3 - avg_train_loss: 0.7999 avg_val_loss: 0.7423 time: 3s
Epoch 3 - Accuracy: 0.587051452439719


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.9696(0.7423) 
Epoch: [4][0/83]Data 0.175 (0.175)Elapsed 0m 0s (remain 0m 15s)Loss: 0.7575(0.7575)Grad: 0.1527  
Epoch: [4][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6236(0.7333)Grad: 0.3039  
EVAL: [0/21] Data 0.188 (0.188) Elapsed 0m 0s (remain 0m 3s) Loss: 0.6970(0.6970) 


Epoch 4 - avg_train_loss: 0.7333 avg_val_loss: 0.6658 time: 3s
Epoch 4 - Accuracy: 0.765141446743877
Epoch 4 - Save Best Score: 0.7651 Mpdel


EVAL: [20/21] Data 0.015 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.8724(0.6658) 
Epoch: [5][0/83]Data 0.205 (0.205)Elapsed 0m 0s (remain 0m 17s)Loss: 0.7282(0.7282)Grad: 0.2427  
Epoch: [5][82/83]Data 0.000 (0.026)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6952(0.6639)Grad: 0.3112  
EVAL: [0/21] Data 0.200 (0.200) Elapsed 0m 0s (remain 0m 4s) Loss: 0.6328(0.6328) 


Epoch 5 - avg_train_loss: 0.6639 avg_val_loss: 0.6013 time: 3s
Epoch 5 - Accuracy: 0.8608315929371558
Epoch 5 - Save Best Score: 0.8608 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.7932(0.6013) 
Epoch: [6][0/83]Data 0.194 (0.194)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5342(0.5342)Grad: 0.2039  
Epoch: [6][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5011(0.6213)Grad: 0.2478  
EVAL: [0/21] Data 0.187 (0.187) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5996(0.5996) 


Epoch 6 - avg_train_loss: 0.6213 avg_val_loss: 0.5652 time: 3s
Epoch 6 - Accuracy: 0.8650085437630529
Epoch 6 - Save Best Score: 0.8650 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.7502(0.5652) 
Epoch: [7][0/83]Data 0.179 (0.179)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5876(0.5876)Grad: 0.2065  
Epoch: [7][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4921(0.5937)Grad: 0.2282  
EVAL: [0/21] Data 0.158 (0.158) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5836(0.5836) 


Epoch 7 - avg_train_loss: 0.5937 avg_val_loss: 0.5455 time: 3s
Epoch 7 - Accuracy: 0.8655781279665844
Epoch 7 - Save Best Score: 0.8656 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.7277(0.5455) 
Epoch: [8][0/83]Data 0.203 (0.203)Elapsed 0m 0s (remain 0m 17s)Loss: 0.5158(0.5158)Grad: 0.1814  
Epoch: [8][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4397(0.5835)Grad: 0.2367  
EVAL: [0/21] Data 0.182 (0.182) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5772(0.5772) 


Epoch 8 - avg_train_loss: 0.5835 avg_val_loss: 0.5362 time: 3s
Epoch 8 - Accuracy: 0.8657679893677616
Epoch 8 - Save Best Score: 0.8658 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.7163(0.5362) 
Epoch: [9][0/83]Data 0.183 (0.183)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5172(0.5172)Grad: 0.1435  
Epoch: [9][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5702(0.5768)Grad: 0.2222  
EVAL: [0/21] Data 0.170 (0.170) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5745(0.5745) 


Epoch 9 - avg_train_loss: 0.5768 avg_val_loss: 0.5324 time: 3s
Epoch 9 - Accuracy: 0.8655781279665844


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.7115(0.5324) 
Epoch: [10][0/83]Data 0.187 (0.187)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5606(0.5606)Grad: 0.1441  
Epoch: [10][82/83]Data 0.008 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6311(0.5759)Grad: 0.3249  
EVAL: [0/21] Data 0.203 (0.203) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5739(0.5739) 


Epoch 10 - avg_train_loss: 0.5759 avg_val_loss: 0.5315 time: 3s
Epoch 10 - Accuracy: 0.8653882665654072


EVAL: [20/21] Data 0.000 (0.031) Elapsed 0m 0s (remain 0m 0s) Loss: 0.7103(0.5315) 
Epoch: [11][0/83]Data 0.196 (0.196)Elapsed 0m 0s (remain 0m 16s)Loss: 0.6774(0.6774)Grad: 0.1998  
Epoch: [11][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.7034(0.5649)Grad: 0.2314  
EVAL: [0/21] Data 0.165 (0.165) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5571(0.5571) 


Epoch 11 - avg_train_loss: 0.5649 avg_val_loss: 0.5023 time: 3s
Epoch 11 - Accuracy: 0.8917790013290298
Epoch 11 - Save Best Score: 0.8918 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.6747(0.5023) 
Epoch: [12][0/83]Data 0.212 (0.212)Elapsed 0m 0s (remain 0m 17s)Loss: 0.4953(0.4953)Grad: 0.1333  
Epoch: [12][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4995(0.5375)Grad: 0.2735  
EVAL: [0/21] Data 0.299 (0.299) Elapsed 0m 0s (remain 0m 6s) Loss: 0.5459(0.5459) 


Epoch 12 - avg_train_loss: 0.5375 avg_val_loss: 0.4814 time: 3s
Epoch 12 - Accuracy: 0.8912094171254984


EVAL: [20/21] Data 0.022 (0.031) Elapsed 0m 0s (remain 0m 0s) Loss: 0.6409(0.4814) 
Epoch: [13][0/83]Data 0.207 (0.207)Elapsed 0m 0s (remain 0m 17s)Loss: 0.6422(0.6422)Grad: 0.3036  
Epoch: [13][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5085(0.5238)Grad: 0.2022  
EVAL: [0/21] Data 0.169 (0.169) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5402(0.5402) 


Epoch 13 - avg_train_loss: 0.5238 avg_val_loss: 0.4688 time: 3s
Epoch 13 - Accuracy: 0.8913992785266756


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.6217(0.4688) 
Epoch: [14][0/83]Data 0.188 (0.188)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4741(0.4741)Grad: 0.1044  
Epoch: [14][82/83]Data 0.007 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6649(0.5181)Grad: 0.3542  
EVAL: [0/21] Data 0.188 (0.188) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5359(0.5359) 


Epoch 14 - avg_train_loss: 0.5181 avg_val_loss: 0.4609 time: 3s
Epoch 14 - Accuracy: 0.8925384469337384
Epoch 14 - Save Best Score: 0.8925 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.6071(0.4609) 
Epoch: [15][0/83]Data 0.184 (0.184)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4887(0.4887)Grad: 0.1683  
Epoch: [15][82/83]Data 0.002 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4082(0.5093)Grad: 0.2580  
EVAL: [0/21] Data 0.170 (0.170) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5342(0.5342) 


Epoch 15 - avg_train_loss: 0.5093 avg_val_loss: 0.4555 time: 3s
Epoch 15 - Accuracy: 0.8923485855325612


EVAL: [20/21] Data 0.000 (0.036) Elapsed 0m 0s (remain 0m 0s) Loss: 0.6000(0.4555) 
Epoch: [16][0/83]Data 0.278 (0.278)Elapsed 0m 0s (remain 0m 23s)Loss: 0.4803(0.4803)Grad: 0.1436  
Epoch: [16][82/83]Data 0.000 (0.026)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6415(0.5079)Grad: 0.4305  
EVAL: [0/21] Data 0.187 (0.187) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5309(0.5309) 


Epoch 16 - avg_train_loss: 0.5079 avg_val_loss: 0.4516 time: 3s
Epoch 16 - Accuracy: 0.8921587241313841


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5946(0.4516) 
Epoch: [17][0/83]Data 0.180 (0.180)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4784(0.4784)Grad: 0.1020  
Epoch: [17][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6565(0.5047)Grad: 0.2202  
EVAL: [0/21] Data 0.163 (0.163) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5291(0.5291) 


Epoch 17 - avg_train_loss: 0.5047 avg_val_loss: 0.4494 time: 3s
Epoch 17 - Accuracy: 0.8927283083349156
Epoch 17 - Save Best Score: 0.8927 Mpdel


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5914(0.4494) 
Epoch: [18][0/83]Data 0.196 (0.196)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4860(0.4860)Grad: 0.1773  
Epoch: [18][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3848(0.5007)Grad: 0.1688  
EVAL: [0/21] Data 0.212 (0.212) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5284(0.5284) 


Epoch 18 - avg_train_loss: 0.5007 avg_val_loss: 0.4480 time: 3s
Epoch 18 - Accuracy: 0.8927283083349156


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5887(0.4480) 
Epoch: [19][0/83]Data 0.192 (0.192)Elapsed 0m 0s (remain 0m 17s)Loss: 0.5363(0.5363)Grad: 0.1366  
Epoch: [19][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5669(0.5005)Grad: 0.1931  
EVAL: [0/21] Data 0.229 (0.229) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5280(0.5280) 


Epoch 19 - avg_train_loss: 0.5005 avg_val_loss: 0.4475 time: 3s
Epoch 19 - Accuracy: 0.8929181697360926
Epoch 19 - Save Best Score: 0.8929 Mpdel


EVAL: [20/21] Data 0.022 (0.029) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5878(0.4475) 
Epoch: [20][0/83]Data 0.186 (0.186)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4814(0.4814)Grad: 0.1720  
Epoch: [20][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5531(0.5005)Grad: 0.1965  
EVAL: [0/21] Data 0.205 (0.205) Elapsed 0m 0s (remain 0m 4s) Loss: 0.5278(0.5278) 


Epoch 20 - avg_train_loss: 0.5005 avg_val_loss: 0.4473 time: 3s
Epoch 20 - Accuracy: 0.8929181697360926


EVAL: [20/21] Data 0.000 (0.033) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5875(0.4473) 
Epoch: [21][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4177(0.4177)Grad: 0.1500  
Epoch: [21][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3849(0.4927)Grad: 0.2416  
EVAL: [0/21] Data 0.164 (0.164) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5225(0.5225) 


Epoch 21 - avg_train_loss: 0.4927 avg_val_loss: 0.4420 time: 3s
Epoch 21 - Accuracy: 0.8934877539396241
Epoch 21 - Save Best Score: 0.8935 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5803(0.4420) 
Epoch: [22][0/83]Data 0.184 (0.184)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5923(0.5923)Grad: 0.1562  
Epoch: [22][82/83]Data 0.000 (0.020)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4141(0.4936)Grad: 0.2696  
EVAL: [0/21] Data 0.193 (0.193) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5178(0.5178) 


Epoch 22 - avg_train_loss: 0.4936 avg_val_loss: 0.4361 time: 3s
Epoch 22 - Accuracy: 0.8942471995443326
Epoch 22 - Save Best Score: 0.8942 Mpdel


EVAL: [20/21] Data 0.002 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5711(0.4361) 
Epoch: [23][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5007(0.5007)Grad: 0.1678  
Epoch: [23][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5138(0.4852)Grad: 0.3476  
EVAL: [0/21] Data 0.179 (0.179) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5120(0.5120) 


Epoch 23 - avg_train_loss: 0.4852 avg_val_loss: 0.4311 time: 3s
Epoch 23 - Accuracy: 0.8940573381431555


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5708(0.4311) 
Epoch: [24][0/83]Data 0.202 (0.202)Elapsed 0m 0s (remain 0m 17s)Loss: 0.4891(0.4891)Grad: 0.1501  
Epoch: [24][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5018(0.4813)Grad: 0.2870  
EVAL: [0/21] Data 0.168 (0.168) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5043(0.5043) 


Epoch 24 - avg_train_loss: 0.4813 avg_val_loss: 0.4261 time: 3s
Epoch 24 - Accuracy: 0.8934877539396241


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5660(0.4261) 
Epoch: [25][0/83]Data 0.191 (0.191)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4075(0.4075)Grad: 0.1524  
Epoch: [25][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6190(0.4767)Grad: 0.3317  
EVAL: [0/21] Data 0.155 (0.155) Elapsed 0m 0s (remain 0m 3s) Loss: 0.5004(0.5004) 


Epoch 25 - avg_train_loss: 0.4767 avg_val_loss: 0.4221 time: 3s
Epoch 25 - Accuracy: 0.8927283083349156


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5611(0.4221) 
Epoch: [26][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4514(0.4514)Grad: 0.1024  
Epoch: [26][82/83]Data 0.010 (0.025)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3312(0.4752)Grad: 0.2125  
EVAL: [0/21] Data 0.191 (0.191) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4965(0.4965) 


Epoch 26 - avg_train_loss: 0.4752 avg_val_loss: 0.4185 time: 3s
Epoch 26 - Accuracy: 0.8934877539396241


EVAL: [20/21] Data 0.026 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5605(0.4185) 
Epoch: [27][0/83]Data 0.192 (0.192)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4028(0.4028)Grad: 0.0946  
Epoch: [27][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3702(0.4719)Grad: 0.1803  
EVAL: [0/21] Data 0.171 (0.171) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4922(0.4922) 


Epoch 27 - avg_train_loss: 0.4719 avg_val_loss: 0.4155 time: 3s
Epoch 27 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5589(0.4155) 
Epoch: [28][0/83]Data 0.190 (0.190)Elapsed 0m 0s (remain 0m 16s)Loss: 0.4869(0.4869)Grad: 0.1341  
Epoch: [28][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.6728(0.4722)Grad: 0.3977  
EVAL: [0/21] Data 0.193 (0.193) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4895(0.4895) 


Epoch 28 - avg_train_loss: 0.4722 avg_val_loss: 0.4135 time: 3s
Epoch 28 - Accuracy: 0.8927283083349156


EVAL: [20/21] Data 0.026 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5570(0.4135) 
Epoch: [29][0/83]Data 0.178 (0.178)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4399(0.4399)Grad: 0.1231  
Epoch: [29][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4953(0.4672)Grad: 0.2970  
EVAL: [0/21] Data 0.168 (0.168) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4885(0.4885) 


Epoch 29 - avg_train_loss: 0.4672 avg_val_loss: 0.4126 time: 3s
Epoch 29 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5564(0.4126) 
Epoch: [30][0/83]Data 0.165 (0.165)Elapsed 0m 0s (remain 0m 14s)Loss: 0.4525(0.4525)Grad: 0.1402  
Epoch: [30][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4815(0.4690)Grad: 0.2086  
EVAL: [0/21] Data 0.261 (0.261) Elapsed 0m 0s (remain 0m 5s) Loss: 0.4882(0.4882) 


Epoch 30 - avg_train_loss: 0.4690 avg_val_loss: 0.4124 time: 3s
Epoch 30 - Accuracy: 0.8925384469337384
Score: 0.89425


EVAL: [20/21] Data 0.023 (0.033) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5562(0.4124) 
Epoch: [1][0/83]Data 0.154 (0.154)Elapsed 0m 0s (remain 0m 13s)Loss: 1.7645(1.7645)Grad: 0.5984  
Epoch: [1][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 1.5003(1.6385)Grad: 0.6563  
EVAL: [0/21] Data 0.239 (0.239) Elapsed 0m 0s (remain 0m 4s) Loss: 1.4653(1.4653) 


Epoch 1 - avg_train_loss: 1.6385 avg_val_loss: 1.4760 time: 3s
Epoch 1 - Accuracy: 0.7797607746345168
Epoch 1 - Save Best Score: 0.7798 Mpdel


EVAL: [20/21] Data 0.025 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 1.4772(1.4760) 
Epoch: [2][0/83]Data 0.174 (0.174)Elapsed 0m 0s (remain 0m 14s)Loss: 1.4878(1.4878)Grad: 0.5954  
Epoch: [2][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.8502(1.2111)Grad: 0.8535  
EVAL: [0/21] Data 0.196 (0.196) Elapsed 0m 0s (remain 0m 3s) Loss: 0.8117(0.8117) 


Epoch 2 - avg_train_loss: 1.2111 avg_val_loss: 0.8394 time: 3s
Epoch 2 - Accuracy: 0.886652743497247
Epoch 2 - Save Best Score: 0.8867 Mpdel


EVAL: [20/21] Data 0.006 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.8694(0.8394) 
Epoch: [3][0/83]Data 0.195 (0.195)Elapsed 0m 0s (remain 0m 16s)Loss: 0.8859(0.8859)Grad: 0.7201  
Epoch: [3][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4957(0.6511)Grad: 0.2869  
EVAL: [0/21] Data 0.236 (0.236) Elapsed 0m 0s (remain 0m 4s) Loss: 0.4210(0.4210) 


Epoch 3 - avg_train_loss: 0.6511 avg_val_loss: 0.4688 time: 3s
Epoch 3 - Accuracy: 0.8876020505031327
Epoch 3 - Save Best Score: 0.8876 Mpdel


EVAL: [20/21] Data 0.002 (0.030) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5108(0.4688) 
Epoch: [4][0/83]Data 0.179 (0.179)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5260(0.5260)Grad: 0.2380  
Epoch: [4][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3056(0.4984)Grad: 0.2236  
EVAL: [0/21] Data 0.205 (0.205) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3697(0.3697) 


Epoch 4 - avg_train_loss: 0.4984 avg_val_loss: 0.4197 time: 3s
Epoch 4 - Accuracy: 0.8895006645149042
Epoch 4 - Save Best Score: 0.8895 Mpdel


EVAL: [20/21] Data 0.002 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4504(0.4197) 
Epoch: [5][0/83]Data 0.180 (0.180)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5226(0.5226)Grad: 0.1452  
Epoch: [5][82/83]Data 0.004 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3523(0.4757)Grad: 0.1873  
EVAL: [0/21] Data 0.202 (0.202) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3557(0.3557) 


Epoch 5 - avg_train_loss: 0.4757 avg_val_loss: 0.4089 time: 3s
Epoch 5 - Accuracy: 0.889310803113727


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4353(0.4089) 
Epoch: [6][0/83]Data 0.182 (0.182)Elapsed 0m 0s (remain 0m 15s)Loss: 0.3526(0.3526)Grad: 0.1327  
Epoch: [6][82/83]Data 0.000 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4362(0.4716)Grad: 0.2522  
EVAL: [0/21] Data 0.201 (0.201) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3520(0.3520) 


Epoch 6 - avg_train_loss: 0.4716 avg_val_loss: 0.4055 time: 3s
Epoch 6 - Accuracy: 0.8896905259160812
Epoch 6 - Save Best Score: 0.8897 Mpdel


EVAL: [20/21] Data 0.002 (0.033) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4295(0.4055) 
Epoch: [7][0/83]Data 0.186 (0.186)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4987(0.4987)Grad: 0.2006  
Epoch: [7][82/83]Data 0.000 (0.026)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4741(0.4649)Grad: 0.3091  
EVAL: [0/21] Data 0.165 (0.165) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3502(0.3502) 


Epoch 7 - avg_train_loss: 0.4649 avg_val_loss: 0.4040 time: 3s
Epoch 7 - Accuracy: 0.8895006645149042


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4272(0.4040) 
Epoch: [8][0/83]Data 0.175 (0.175)Elapsed 0m 0s (remain 0m 14s)Loss: 0.4857(0.4857)Grad: 0.1429  
Epoch: [8][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4800(0.4643)Grad: 0.2866  
EVAL: [0/21] Data 0.191 (0.191) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3493(0.3493) 


Epoch 8 - avg_train_loss: 0.4643 avg_val_loss: 0.4030 time: 3s
Epoch 8 - Accuracy: 0.8900702487184355
Epoch 8 - Save Best Score: 0.8901 Mpdel


EVAL: [20/21] Data 0.005 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4259(0.4030) 
Epoch: [9][0/83]Data 0.191 (0.191)Elapsed 0m 0s (remain 0m 16s)Loss: 0.3826(0.3826)Grad: 0.2273  
Epoch: [9][82/83]Data 0.002 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2348(0.4604)Grad: 0.1854  
EVAL: [0/21] Data 0.188 (0.188) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3490(0.3490) 


Epoch 9 - avg_train_loss: 0.4604 avg_val_loss: 0.4027 time: 3s
Epoch 9 - Accuracy: 0.8898803873172584


EVAL: [20/21] Data 0.002 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4251(0.4027) 
Epoch: [10][0/83]Data 0.179 (0.179)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4939(0.4939)Grad: 0.1210  
Epoch: [10][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5048(0.4609)Grad: 0.3154  
EVAL: [0/21] Data 0.210 (0.210) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3490(0.3490) 


Epoch 10 - avg_train_loss: 0.4609 avg_val_loss: 0.4026 time: 3s
Epoch 10 - Accuracy: 0.8898803873172584


EVAL: [20/21] Data 0.000 (0.036) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4248(0.4026) 
Epoch: [11][0/83]Data 0.183 (0.183)Elapsed 0m 0s (remain 0m 15s)Loss: 0.3759(0.3759)Grad: 0.1108  
Epoch: [11][82/83]Data 0.010 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3917(0.4593)Grad: 0.2516  
EVAL: [0/21] Data 0.189 (0.189) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3451(0.3451) 


Epoch 11 - avg_train_loss: 0.4593 avg_val_loss: 0.3998 time: 3s
Epoch 11 - Accuracy: 0.8902601101196127
Epoch 11 - Save Best Score: 0.8903 Mpdel


EVAL: [20/21] Data 0.000 (0.026) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4178(0.3998) 
Epoch: [12][0/83]Data 0.181 (0.181)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4058(0.4058)Grad: 0.1298  
Epoch: [12][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3692(0.4555)Grad: 0.1683  
EVAL: [0/21] Data 0.242 (0.242) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3449(0.3449) 


Epoch 12 - avg_train_loss: 0.4555 avg_val_loss: 0.3981 time: 3s
Epoch 12 - Accuracy: 0.8908296943231441
Epoch 12 - Save Best Score: 0.8908 Mpdel


EVAL: [20/21] Data 0.022 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4120(0.3981) 
Epoch: [13][0/83]Data 0.166 (0.166)Elapsed 0m 0s (remain 0m 14s)Loss: 0.4567(0.4567)Grad: 0.1098  
Epoch: [13][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5211(0.4543)Grad: 0.2921  
EVAL: [0/21] Data 0.210 (0.210) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3413(0.3413) 


Epoch 13 - avg_train_loss: 0.4543 avg_val_loss: 0.3961 time: 3s
Epoch 13 - Accuracy: 0.8902601101196127


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4112(0.3961) 
Epoch: [14][0/83]Data 0.172 (0.172)Elapsed 0m 0s (remain 0m 14s)Loss: 0.3891(0.3891)Grad: 0.1133  
Epoch: [14][82/83]Data 0.003 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5175(0.4555)Grad: 0.3371  
EVAL: [0/21] Data 0.176 (0.176) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3419(0.3419) 


Epoch 14 - avg_train_loss: 0.4555 avg_val_loss: 0.3961 time: 3s
Epoch 14 - Accuracy: 0.8904499715207899


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4118(0.3961) 
Epoch: [15][0/83]Data 0.183 (0.183)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4808(0.4808)Grad: 0.1660  
Epoch: [15][82/83]Data 0.010 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3900(0.4523)Grad: 0.2228  
EVAL: [0/21] Data 0.161 (0.161) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3395(0.3395) 


Epoch 15 - avg_train_loss: 0.4523 avg_val_loss: 0.3954 time: 3s
Epoch 15 - Accuracy: 0.8912094171254984
Epoch 15 - Save Best Score: 0.8912 Mpdel


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4110(0.3954) 
Epoch: [16][0/83]Data 0.188 (0.188)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5154(0.5154)Grad: 0.1425  
Epoch: [16][82/83]Data 0.002 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4491(0.4522)Grad: 0.2465  
EVAL: [0/21] Data 0.188 (0.188) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3415(0.3415) 


Epoch 16 - avg_train_loss: 0.4522 avg_val_loss: 0.3951 time: 3s
Epoch 16 - Accuracy: 0.8912094171254984


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4088(0.3951) 
Epoch: [17][0/83]Data 0.183 (0.183)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4209(0.4209)Grad: 0.1979  
Epoch: [17][82/83]Data 0.000 (0.026)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4267(0.4535)Grad: 0.2059  
EVAL: [0/21] Data 0.181 (0.181) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3417(0.3417) 


Epoch 17 - avg_train_loss: 0.4535 avg_val_loss: 0.3951 time: 3s
Epoch 17 - Accuracy: 0.8913992785266756
Epoch 17 - Save Best Score: 0.8914 Mpdel


EVAL: [20/21] Data 0.020 (0.030) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4081(0.3951) 
Epoch: [18][0/83]Data 0.189 (0.189)Elapsed 0m 0s (remain 0m 16s)Loss: 0.3867(0.3867)Grad: 0.1486  
Epoch: [18][82/83]Data 0.006 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3260(0.4509)Grad: 0.2006  
EVAL: [0/21] Data 0.162 (0.162) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3414(0.3414) 


Epoch 18 - avg_train_loss: 0.4509 avg_val_loss: 0.3948 time: 3s
Epoch 18 - Accuracy: 0.8913992785266756


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4078(0.3948) 
Epoch: [19][0/83]Data 0.177 (0.177)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4988(0.4988)Grad: 0.1286  
Epoch: [19][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4284(0.4523)Grad: 0.1827  
EVAL: [0/21] Data 0.160 (0.160) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3414(0.3414) 


Epoch 19 - avg_train_loss: 0.4523 avg_val_loss: 0.3948 time: 3s
Epoch 19 - Accuracy: 0.8913992785266756


EVAL: [20/21] Data 0.000 (0.026) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4077(0.3948) 
Epoch: [20][0/83]Data 0.188 (0.188)Elapsed 0m 0s (remain 0m 16s)Loss: 0.5284(0.5284)Grad: 0.1328  
Epoch: [20][82/83]Data 0.000 (0.020)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4857(0.4461)Grad: 0.3147  
EVAL: [0/21] Data 0.182 (0.182) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3413(0.3413) 


Epoch 20 - avg_train_loss: 0.4461 avg_val_loss: 0.3948 time: 3s
Epoch 20 - Accuracy: 0.8913992785266756


EVAL: [20/21] Data 0.000 (0.035) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4077(0.3948) 
Epoch: [21][0/83]Data 0.182 (0.182)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5273(0.5273)Grad: 0.1613  
Epoch: [21][82/83]Data 0.002 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4190(0.4514)Grad: 0.3173  
EVAL: [0/21] Data 0.202 (0.202) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3429(0.3429) 


Epoch 21 - avg_train_loss: 0.4514 avg_val_loss: 0.3945 time: 3s
Epoch 21 - Accuracy: 0.8915891399278527
Epoch 21 - Save Best Score: 0.8916 Mpdel


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4063(0.3945) 
Epoch: [22][0/83]Data 0.159 (0.159)Elapsed 0m 0s (remain 0m 13s)Loss: 0.4779(0.4779)Grad: 0.1851  
Epoch: [22][82/83]Data 0.005 (0.024)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5335(0.4479)Grad: 0.2138  
EVAL: [0/21] Data 0.169 (0.169) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3395(0.3395) 


Epoch 22 - avg_train_loss: 0.4479 avg_val_loss: 0.3934 time: 3s
Epoch 22 - Accuracy: 0.8919688627302069
Epoch 22 - Save Best Score: 0.8920 Mpdel


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4060(0.3934) 
Epoch: [23][0/83]Data 0.185 (0.185)Elapsed 0m 0s (remain 0m 15s)Loss: 0.5933(0.5933)Grad: 0.1263  
Epoch: [23][82/83]Data 0.002 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2305(0.4490)Grad: 0.1732  
EVAL: [0/21] Data 0.231 (0.231) Elapsed 0m 0s (remain 0m 4s) Loss: 0.3402(0.3402) 


Epoch 23 - avg_train_loss: 0.4490 avg_val_loss: 0.3933 time: 3s
Epoch 23 - Accuracy: 0.8923485855325612
Epoch 23 - Save Best Score: 0.8923 Mpdel


EVAL: [20/21] Data 0.024 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4052(0.3933) 
Epoch: [24][0/83]Data 0.174 (0.174)Elapsed 0m 0s (remain 0m 15s)Loss: 0.3509(0.3509)Grad: 0.1465  
Epoch: [24][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3157(0.4460)Grad: 0.1903  
EVAL: [0/21] Data 0.191 (0.191) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3395(0.3395) 


Epoch 24 - avg_train_loss: 0.4460 avg_val_loss: 0.3931 time: 3s
Epoch 24 - Accuracy: 0.8919688627302069


EVAL: [20/21] Data 0.024 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4042(0.3931) 
Epoch: [25][0/83]Data 0.170 (0.170)Elapsed 0m 0s (remain 0m 14s)Loss: 0.4647(0.4647)Grad: 0.2336  
Epoch: [25][82/83]Data 0.000 (0.021)Elapsed 0m 2s (remain 0m 0s)Loss: 0.3573(0.4487)Grad: 0.1768  
EVAL: [0/21] Data 0.170 (0.170) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3385(0.3385) 


Epoch 25 - avg_train_loss: 0.4487 avg_val_loss: 0.3929 time: 3s
Epoch 25 - Accuracy: 0.8921587241313841


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4051(0.3929) 
Epoch: [26][0/83]Data 0.185 (0.185)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4679(0.4679)Grad: 0.1714  
Epoch: [26][82/83]Data 0.000 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.2062(0.4456)Grad: 0.2334  
EVAL: [0/21] Data 0.164 (0.164) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3395(0.3395) 


Epoch 26 - avg_train_loss: 0.4456 avg_val_loss: 0.3928 time: 3s
Epoch 26 - Accuracy: 0.8927283083349156
Epoch 26 - Save Best Score: 0.8927 Mpdel


EVAL: [20/21] Data 0.000 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4052(0.3928) 
Epoch: [27][0/83]Data 0.180 (0.180)Elapsed 0m 0s (remain 0m 15s)Loss: 0.3877(0.3877)Grad: 0.0912  
Epoch: [27][82/83]Data 0.000 (0.027)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4229(0.4471)Grad: 0.2307  
EVAL: [0/21] Data 0.195 (0.195) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3394(0.3394) 


Epoch 27 - avg_train_loss: 0.4471 avg_val_loss: 0.3925 time: 3s
Epoch 27 - Accuracy: 0.8927283083349156


EVAL: [20/21] Data 0.025 (0.028) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4044(0.3925) 
Epoch: [28][0/83]Data 0.166 (0.166)Elapsed 0m 0s (remain 0m 14s)Loss: 0.3425(0.3425)Grad: 0.1354  
Epoch: [28][82/83]Data 0.000 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4957(0.4439)Grad: 0.2465  
EVAL: [0/21] Data 0.157 (0.157) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3391(0.3391) 


Epoch 28 - avg_train_loss: 0.4439 avg_val_loss: 0.3923 time: 3s
Epoch 28 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.027) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4047(0.3923) 
Epoch: [29][0/83]Data 0.183 (0.183)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4438(0.4438)Grad: 0.1647  
Epoch: [29][82/83]Data 0.002 (0.022)Elapsed 0m 2s (remain 0m 0s)Loss: 0.4533(0.4476)Grad: 0.3079  
EVAL: [0/21] Data 0.170 (0.170) Elapsed 0m 0s (remain 0m 3s) Loss: 0.3392(0.3392) 


Epoch 29 - avg_train_loss: 0.4476 avg_val_loss: 0.3922 time: 3s
Epoch 29 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.029) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4044(0.3922) 
Epoch: [30][0/83]Data 0.188 (0.188)Elapsed 0m 0s (remain 0m 15s)Loss: 0.4752(0.4752)Grad: 0.1296  
Epoch: [30][82/83]Data 0.002 (0.023)Elapsed 0m 2s (remain 0m 0s)Loss: 0.5885(0.4501)Grad: 0.3264  
EVAL: [0/21] Data 0.296 (0.296) Elapsed 0m 0s (remain 0m 5s) Loss: 0.3392(0.3392) 


Epoch 30 - avg_train_loss: 0.4501 avg_val_loss: 0.3922 time: 3s
Epoch 30 - Accuracy: 0.8925384469337384
Score: 0.89273
Score: 0.89441


EVAL: [20/21] Data 0.025 (0.034) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4045(0.3922) 
