In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
os.listdir('../input/cassava-leaf-disease-classification/')

['train_tfrecords',
 'sample_submission.csv',
 'test_tfrecords',
 'label_num_to_disease_map.json',
 'train_images',
 'train.csv',
 'test_images']

In [3]:
train = pd.read_csv('../input/cassava-leaf-disease-merged/merged.csv')
test = pd.read_csv('../input/cassava-leaf-disease-classification//sample_submission.csv')
label_map = pd.read_json('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json', orient='index')

## Directory settings

In [4]:
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    
TRAIN_PATH = '../input/cassava-leaf-disease-merged/train'
TEST_PATH = '../input/cassava-leaf-disease-classification/test_images'

## CFG

In [5]:
class CFG:
    debug = False
    apex = False
    print_freq = 100
    num_workers = 4
    model_name = '2D_CNN'
    size = 256
    scheduler = 'CosineAnnealingWarmRestarts'
    epochs = 30
    T_0 = 10
    lr = 1e-3
    min_lr = 1e-6
    batch_size = 256
    weight_decay = 1e-6
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    seed = 42
    target_size = 5
    target_col = 'label'
    n_fold = 5
    trn_fold = [0, 1, 2, 3, 4]
    train = True
    inference = False
    
if CFG.debug:
    CFG.epochs = 1
    train = train.sample(n=1000, random_state=CFG.seed).reset_index(drop=True)

## Library

In [6]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose
    )
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import timm

import warnings 
warnings.filterwarnings('ignore')

if CFG.apex:
    from apex import amp

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Utils

In [7]:
def get_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f}')
    
def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

## CV split

In [8]:
oof_B3ns = pd.read_csv('../input/041-create-oof-efficientnet-b3ns-data/oof_df_0.csv')
oof_se = pd.read_csv('../input/nb023-data/oof_df.csv')

oof_B3ns_ = oof_B3ns[['0', '1', '2', '3', '4']]
oof_se_ = oof_se[['0', '1', '2', '3', '4']]
oof_se_.columns = ['5', '6', '7', '8', '9']

train = pd.concat([oof_B3ns_, oof_se_, oof_se[['label']]], axis=1)
train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,label
0,3.826102e-15,4.340454e-16,5.234761e-11,1.000000e+00,4.762953e-14,1.522047e-14,9.014521e-16,1.139739e-09,1.000000e+00,3.116005e-13,3
1,9.538939e-01,1.037140e-02,1.156314e-06,8.508536e-07,3.573275e-02,9.599438e-01,7.439279e-03,1.127475e-06,8.668555e-07,3.261490e-02,4
2,4.627016e-18,1.426970e-14,1.898384e-14,1.000000e+00,4.604767e-15,3.705072e-17,5.054389e-14,2.114423e-13,1.000000e+00,2.999360e-14,3
3,4.606794e-07,1.124587e-05,1.638323e-02,9.827052e-01,8.998843e-04,3.833728e-07,9.185316e-06,1.194510e-02,9.875053e-01,5.400496e-04,3
4,6.382671e-06,1.690052e-05,4.985041e-04,1.611800e-05,9.994621e-01,4.209951e-06,7.615774e-06,2.362547e-04,8.609924e-06,9.997434e-01,2
...,...,...,...,...,...,...,...,...,...,...,...
26332,8.475993e-07,9.982687e-01,2.881717e-07,1.730106e-03,8.013854e-08,9.047382e-07,9.980873e-01,3.099606e-07,1.911314e-03,6.628071e-08,1
26333,4.107651e-02,9.103698e-01,1.831035e-02,2.917714e-03,2.732570e-02,2.457832e-02,9.492393e-01,9.367346e-03,1.033589e-03,1.578141e-02,1
26334,3.930135e-09,1.206950e-05,3.702277e-11,9.999880e-01,3.137060e-13,6.578338e-09,1.752745e-05,4.747458e-11,9.999825e-01,7.934944e-13,3
26335,3.089139e-07,2.266481e-06,4.496734e-04,9.995383e-01,9.492393e-06,1.146029e-07,8.421153e-07,5.965448e-04,9.994010e-01,1.521211e-06,3


In [9]:
folds = train.copy()
Fold = StratifiedKFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds[CFG.target_col])):
    folds.loc[val_index, 'fold'] = int(n)
folds['fold'] = folds['fold'].astype(int)
print(folds.groupby(['fold', CFG.target_col]).size())

fold  label
0     0         299
      1         695
      2         603
      3        3093
      4         578
1     0         299
      1         695
      2         603
      3        3093
      4         578
2     0         298
      1         695
      2         604
      3        3092
      4         578
3     0         298
      1         695
      2         604
      3        3092
      4         578
4     0         298
      1         696
      2         603
      3        3092
      4         578
dtype: int64


In [10]:
folds

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,label,fold
0,3.826102e-15,4.340454e-16,5.234761e-11,1.000000e+00,4.762953e-14,1.522047e-14,9.014521e-16,1.139739e-09,1.000000e+00,3.116005e-13,3,0
1,9.538939e-01,1.037140e-02,1.156314e-06,8.508536e-07,3.573275e-02,9.599438e-01,7.439279e-03,1.127475e-06,8.668555e-07,3.261490e-02,4,2
2,4.627016e-18,1.426970e-14,1.898384e-14,1.000000e+00,4.604767e-15,3.705072e-17,5.054389e-14,2.114423e-13,1.000000e+00,2.999360e-14,3,2
3,4.606794e-07,1.124587e-05,1.638323e-02,9.827052e-01,8.998843e-04,3.833728e-07,9.185316e-06,1.194510e-02,9.875053e-01,5.400496e-04,3,2
4,6.382671e-06,1.690052e-05,4.985041e-04,1.611800e-05,9.994621e-01,4.209951e-06,7.615774e-06,2.362547e-04,8.609924e-06,9.997434e-01,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...
26332,8.475993e-07,9.982687e-01,2.881717e-07,1.730106e-03,8.013854e-08,9.047382e-07,9.980873e-01,3.099606e-07,1.911314e-03,6.628071e-08,1,4
26333,4.107651e-02,9.103698e-01,1.831035e-02,2.917714e-03,2.732570e-02,2.457832e-02,9.492393e-01,9.367346e-03,1.033589e-03,1.578141e-02,1,3
26334,3.930135e-09,1.206950e-05,3.702277e-11,9.999880e-01,3.137060e-13,6.578338e-09,1.752745e-05,4.747458e-11,9.999825e-01,7.934944e-13,3,0
26335,3.089139e-07,2.266481e-06,4.496734e-04,9.995383e-01,9.492393e-06,1.146029e-07,8.421153e-07,5.965448e-04,9.994010e-01,1.521211e-06,3,2


In [11]:
train = np.stack([df.values for df in [oof_B3ns_, oof_se_]], axis=2)
train = train[:, None, ...]
print(train.shape)

(26337, 1, 5, 2)


## Dataset

In [12]:
class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.labels = df['label'].values
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TRAIN_PATH}/{file_name}'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).long()
        return image, label
    
class StackingDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        X = torch.tensor(self.X[idx]).float()
        y = torch.tensor(self.y[idx]).long()
        return X, y
    
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

In [13]:
train_dataset = StackingDataset(train, folds['label'])

for i in range(1):
    X, y = train_dataset[i]
    print(X.size())

torch.Size([1, 5, 2])


## Transforms

In [14]:
def get_transforms(*, data):
    
    if data == 'train':
        return Compose([
            RandomResizedCrop(CFG.size, CFG.size), 
            Transpose(p=0.5), 
            HorizontalFlip(p=0.5), 
            VerticalFlip(p=0.5), 
            ShiftScaleRotate(p=0.5), 
            Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
            ), 
            ToTensorV2(),
        ])
    
    elif data == 'valid':
        return Compose([
            Resize(CFG.size, CFG.size), 
            Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
            ), 
            ToTensorV2(),
        ])

In [15]:
# train_dataset = StackingDataset(train, transform=get_transforms(data='train'))

# for i in range(1):
#     image, label = train_dataset[i]
#     plt.imshow(image[0])
#     plt.title(f'label: {label}')
#     plt.show()

## MODEL

In [16]:
class CustomResNext(nn.Module):
    def __init__(self, model_name='resnext50_32x4d', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, CFG.target_size)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [17]:
class StackingModel(nn.Module):
    def __init__(self, num_features, num_targets, dropout_ratio):
        super(StackingModel, self).__init__()
        
        self.conv2d_1 = nn.Conv2d(in_channels=1, 
                                  out_channels=16, 
                                  kernel_size=(1, 2), 
                                  stride=1, 
                                  padding=0)
        self.relu_1 = nn.ReLU()
        
        self.linear_2 = nn.Linear(80, 32)
        self.relu_2 = nn.ReLU()
        self.dropout_2 = nn.Dropout(dropout_ratio)
        
        self.linear_3 = nn.Linear(32, num_targets)
        
    def forward(self, x, batch_size):
        x = self.conv2d_1(x)
        x = self.relu_1(x)
        x = torch.reshape(x, (batch_size, -1))
        
        x = self.linear_2(x)
        x = self.relu_2(x)
        x = self.dropout_2(x)
        
        x = self.linear_3(x)
        
        return x

In [18]:
model = StackingModel(num_features=CFG.target_size, num_targets=CFG.target_size, dropout_ratio=0.2)
train_dataset = StackingDataset(train, folds['label'])
train_loader = DataLoader(train_dataset, batch_size=CFG.batch_size, shuffle=True, 
                          num_workers=4, pin_memory=True, drop_last=True)

for X, y in train_loader:
    print(X.size())
    output = model(X, CFG.batch_size)
    print(output.size())
    break

torch.Size([256, 1, 5, 2])
torch.Size([256, 5])


In [19]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def asMinutes(s):
    """秒を分に変換する関数"""
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    """経過時間の測定と終了時間の予測を行う関数
    Parameters
    ----------
    since : float
        実験を始めた時刻
    percent : float
        実験が進んだ割合
        
    Returns
    -------
    s : 経過時間
    re : 終了までの時間の予測
    """
    now = time.time()
    s = now - since  # 経過時間の測定
    es = s / percent  # 終了時間の予測
    re = es - s  # 残り時間の予想
    return '%s (remain %s)' % (asMinutes(s), asMinutes(re))

def train_fn(train_loader, model, criterion, optimizer, epoch, shechduler, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0
    for step, (X, y) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        X = X.to(device)
        y = y.to(device)
        batch_size = y.size(0)
        y_preds = model(X, batch_size)
        loss = criterion(y_preds, y)
        # record loss
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if CFG.apex:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else: 
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}]'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})'
                  'Elapsed {remain:s}' 
                  'Loss: {loss.val:.4f}({loss.avg:.4f})' 
                  'Grad: {grad_norm:.4f}  '
                  .format(epoch+1, step, len(train_loader), batch_time=batch_time, 
                          data_time=data_time, loss=losses, 
                          remain=timeSince(start, float(step+1)/len(train_loader)), 
                          grad_norm=grad_norm))
    return losses.avg

def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    preds = []
    start = end = time.time()
    for step, (X, y) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        X = X.to(device)
        y = y.to(device)
        batch_size = y.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(X, batch_size)
        loss = criterion(y_preds, y)
        losses.update(loss.item(), batch_size)
        # record accuracy
        preds.append(y_preds.softmax(1).to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
            
    predictions = np.concatenate(preds)
    return losses.avg, predictions

def inference(model, states, test_loader, device):
    model.to(device)
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avgpreds = []
        for state in states:
            model.load_state_dict(state['model'])
            model.eval()
            with torch.no_grad():
                y_preds = model(images)
            avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

## Train loop

In [20]:
# ======================================================
# Train loop
# ======================================================

def train_loop(folds, fold):
    
    LOGGER.info(f'========== fold: {fold} training ============')
    
    # ======================================================
    # loader
    # ======================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index
    
    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    
    train_ = train[trn_idx]
    valid_ = train[val_idx]
    
    train_dataset = StackingDataset(train_, train_folds['label'])
    valid_dataset = StackingDataset(valid_, valid_folds['label'])
    
    train_loader = DataLoader(train_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=True, 
                              num_workers=CFG.num_workers, 
                              pin_memory=True, 
                              drop_last=False)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=False, 
                              num_workers=CFG.num_workers, 
                              pin_memory=True, 
                              drop_last=False)
    
    # ===============================================
    # scheduler
    # ===============================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler
    
    # ===============================================
    # model & optimizer
    # ===============================================
    model = StackingModel(num_features=CFG.target_size, num_targets=CFG.target_size, dropout_ratio=0.3)
    model.to(device)
    
    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)
    
    # ===============================================
    # apex 
    # ===============================================
    if CFG.apex:
        model.optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
        
    # ===============================================
    # loop
    # ===============================================
    criterion = nn.CrossEntropyLoss()
    
    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)
        
        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        valid_labels = valid_folds[CFG.target_col].values
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()
        
        # scoring
        score = get_score(valid_labels, preds.argmax(1))
        
        elapsed = time.time() - start_time
        
        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Accuracy: {score}')
        
        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Mpdel')
            torch.save({'model': model.state_dict(), 
                        'preds': preds}, 
                        OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    valid_folds[[str(c) for c in range(5)]] = check_point['preds']
    valid_folds['preds'] = check_point['preds'].argmax(1)
    
    return valid_folds

In [21]:
# ====================================================
# main
# ====================================================
def main():
    
    """
    Prepare: 1.train 2.test 3.submission 4.folds
    """
    
    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.5f}')
        
    if CFG.train:
        # train
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(folds, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f'=============== fold: {fold} result ================')
                get_result(_oof_df)
        # CV result
        LOGGER.info(f'============ CV ============')
        get_result(oof_df)
        # save result
        oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)
        
    if CFG.inference:
        # inference
        model = CustomResNext(CFG.model_name, pretrained=False)
        states = [torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth') for fold in CFG.trn_fold]
        test_dataset = TestDataset(test, batch_size=CFG.batch_size, shuffle=False, pin_memory=True)
        predictions = inference(model, states, test_loader, device)
        # submission
        test['label'] = predictions.argmax(1)
        test[['image_id', 'label']].to_csv(OUTPUT_DIR+'submission.csv', index=False)

In [22]:
if __name__ == '__main__':
    main()



Epoch: [1][0/83]Data 0.096 (0.096)Elapsed 0m 0s (remain 0m 13s)Loss: 1.5663(1.5663)Grad: 0.7891  
Epoch: [1][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.8897(1.1463)Grad: 0.6209  
EVAL: [0/21] Data 0.097 (0.097) Elapsed 0m 0s (remain 0m 2s) Loss: 0.7037(0.7037) 


Epoch 1 - avg_train_loss: 1.1463 avg_val_loss: 0.7279 time: 1s
Epoch 1 - Accuracy: 0.8494684889901291
Epoch 1 - Save Best Score: 0.8495 Mpdel


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.8872(0.7279) 
Epoch: [2][0/83]Data 0.111 (0.111)Elapsed 0m 0s (remain 0m 9s)Loss: 0.7989(0.7989)Grad: 0.5018  
Epoch: [2][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4514(0.5943)Grad: 0.2986  
EVAL: [0/21] Data 0.109 (0.109) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3783(0.3783) 


Epoch 2 - avg_train_loss: 0.5943 avg_val_loss: 0.4300 time: 1s
Epoch 2 - Accuracy: 0.8722475322703113
Epoch 2 - Save Best Score: 0.8722 Mpdel


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5319(0.4300) 
Epoch: [3][0/83]Data 0.108 (0.108)Elapsed 0m 0s (remain 0m 9s)Loss: 0.5415(0.5415)Grad: 0.1794  
Epoch: [3][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4530(0.4884)Grad: 0.4396  
EVAL: [0/21] Data 0.101 (0.101) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3301(0.3301) 


Epoch 3 - avg_train_loss: 0.4884 avg_val_loss: 0.3918 time: 1s
Epoch 3 - Accuracy: 0.9005315110098709
Epoch 3 - Save Best Score: 0.9005 Mpdel


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4736(0.3918) 
Epoch: [4][0/83]Data 0.116 (0.116)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4290(0.4290)Grad: 0.1583  
Epoch: [4][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4394(0.4621)Grad: 0.3094  
EVAL: [0/21] Data 0.093 (0.093) Elapsed 0m 0s (remain 0m 1s) Loss: 0.3166(0.3166) 


Epoch 4 - avg_train_loss: 0.4621 avg_val_loss: 0.3793 time: 1s
Epoch 4 - Accuracy: 0.9029992406985573
Epoch 4 - Save Best Score: 0.9030 Mpdel


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4542(0.3793) 
Epoch: [5][0/83]Data 0.123 (0.123)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4440(0.4440)Grad: 0.1844  
Epoch: [5][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3865(0.4579)Grad: 0.3762  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3113(0.3113) 


Epoch 5 - avg_train_loss: 0.4579 avg_val_loss: 0.3747 time: 1s
Epoch 5 - Accuracy: 0.9018602885345482


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4569(0.3747) 
Epoch: [6][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4256(0.4256)Grad: 0.1510  
Epoch: [6][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.7760(0.4527)Grad: 0.6544  
EVAL: [0/21] Data 0.091 (0.091) Elapsed 0m 0s (remain 0m 1s) Loss: 0.3088(0.3088) 


Epoch 6 - avg_train_loss: 0.4527 avg_val_loss: 0.3714 time: 1s
Epoch 6 - Accuracy: 0.9026195899772209


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4546(0.3714) 
Epoch: [7][0/83]Data 0.119 (0.119)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4839(0.4839)Grad: 0.1608  
Epoch: [7][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3681(0.4492)Grad: 0.3451  
EVAL: [0/21] Data 0.096 (0.096) Elapsed 0m 0s (remain 0m 1s) Loss: 0.3081(0.3081) 


Epoch 7 - avg_train_loss: 0.4492 avg_val_loss: 0.3709 time: 1s
Epoch 7 - Accuracy: 0.9026195899772209


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4571(0.3709) 
Epoch: [8][0/83]Data 0.114 (0.114)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4099(0.4099)Grad: 0.1854  
Epoch: [8][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4996(0.4456)Grad: 0.3239  
EVAL: [0/21] Data 0.107 (0.107) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3079(0.3079) 


Epoch 8 - avg_train_loss: 0.4456 avg_val_loss: 0.3703 time: 1s
Epoch 8 - Accuracy: 0.9024297646165528


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4558(0.3703) 
Epoch: [9][0/83]Data 0.107 (0.107)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4813(0.4813)Grad: 0.2589  
Epoch: [9][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3672(0.4498)Grad: 0.3044  
EVAL: [0/21] Data 0.106 (0.106) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3075(0.3075) 


Epoch 9 - avg_train_loss: 0.4498 avg_val_loss: 0.3698 time: 1s
Epoch 9 - Accuracy: 0.9026195899772209


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4560(0.3698) 
Epoch: [10][0/83]Data 0.096 (0.096)Elapsed 0m 0s (remain 0m 8s)Loss: 0.4348(0.4348)Grad: 0.1727  
Epoch: [10][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3646(0.4463)Grad: 0.3911  
EVAL: [0/21] Data 0.094 (0.094) Elapsed 0m 0s (remain 0m 1s) Loss: 0.3073(0.3073) 


Epoch 10 - avg_train_loss: 0.4463 avg_val_loss: 0.3697 time: 1s
Epoch 10 - Accuracy: 0.9024297646165528


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4565(0.3697) 
Epoch: [11][0/83]Data 0.095 (0.095)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4702(0.4702)Grad: 0.2644  
Epoch: [11][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4562(0.4459)Grad: 0.2833  
EVAL: [0/21] Data 0.105 (0.105) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3044(0.3044) 


Epoch 11 - avg_train_loss: 0.4459 avg_val_loss: 0.3673 time: 1s
Epoch 11 - Accuracy: 0.9029992406985573


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4633(0.3673) 
Epoch: [12][0/83]Data 0.118 (0.118)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4072(0.4072)Grad: 0.2541  
Epoch: [12][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2733(0.4406)Grad: 0.3098  
EVAL: [0/21] Data 0.102 (0.102) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3012(0.3012) 


Epoch 12 - avg_train_loss: 0.4406 avg_val_loss: 0.3653 time: 1s
Epoch 12 - Accuracy: 0.9018602885345482


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4614(0.3653) 
Epoch: [13][0/83]Data 0.109 (0.109)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3993(0.3993)Grad: 0.2132  
Epoch: [13][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5371(0.4382)Grad: 0.4778  
EVAL: [0/21] Data 0.105 (0.105) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3039(0.3039) 


Epoch 13 - avg_train_loss: 0.4382 avg_val_loss: 0.3656 time: 1s
Epoch 13 - Accuracy: 0.9014806378132119


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4621(0.3656) 
Epoch: [14][0/83]Data 0.112 (0.112)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4893(0.4893)Grad: 0.2331  
Epoch: [14][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3512(0.4399)Grad: 0.3739  
EVAL: [0/21] Data 0.097 (0.097) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3017(0.3017) 


Epoch 14 - avg_train_loss: 0.4399 avg_val_loss: 0.3634 time: 1s
Epoch 14 - Accuracy: 0.9022399392558846


EVAL: [20/21] Data 0.002 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4620(0.3634) 
Epoch: [15][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4843(0.4843)Grad: 0.2485  
Epoch: [15][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4324(0.4330)Grad: 0.3366  
EVAL: [0/21] Data 0.114 (0.114) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3031(0.3031) 


Epoch 15 - avg_train_loss: 0.4330 avg_val_loss: 0.3633 time: 1s
Epoch 15 - Accuracy: 0.9014806378132119


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4588(0.3633) 
Epoch: [16][0/83]Data 0.103 (0.103)Elapsed 0m 0s (remain 0m 9s)Loss: 0.5060(0.5060)Grad: 0.1615  
Epoch: [16][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5078(0.4335)Grad: 0.3756  
EVAL: [0/21] Data 0.099 (0.099) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3033(0.3033) 


Epoch 16 - avg_train_loss: 0.4335 avg_val_loss: 0.3632 time: 1s
Epoch 16 - Accuracy: 0.90167046317388


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4614(0.3632) 
Epoch: [17][0/83]Data 0.106 (0.106)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4314(0.4314)Grad: 0.1814  
Epoch: [17][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5325(0.4363)Grad: 0.4116  
EVAL: [0/21] Data 0.093 (0.093) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3016(0.3016) 


Epoch 17 - avg_train_loss: 0.4363 avg_val_loss: 0.3619 time: 1s
Epoch 17 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4610(0.3619) 
Epoch: [18][0/83]Data 0.104 (0.104)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4488(0.4488)Grad: 0.2168  
Epoch: [18][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.6715(0.4316)Grad: 0.5304  
EVAL: [0/21] Data 0.101 (0.101) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3016(0.3016) 


Epoch 18 - avg_train_loss: 0.4316 avg_val_loss: 0.3617 time: 1s
Epoch 18 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4594(0.3617) 
Epoch: [19][0/83]Data 0.096 (0.096)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4472(0.4472)Grad: 0.1659  
Epoch: [19][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3627(0.4269)Grad: 0.2652  
EVAL: [0/21] Data 0.110 (0.110) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3011(0.3011) 


Epoch 19 - avg_train_loss: 0.4269 avg_val_loss: 0.3614 time: 1s
Epoch 19 - Accuracy: 0.9014806378132119


EVAL: [20/21] Data 0.002 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4593(0.3614) 
Epoch: [20][0/83]Data 0.111 (0.111)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4228(0.4228)Grad: 0.1381  
Epoch: [20][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3087(0.4298)Grad: 0.3238  
EVAL: [0/21] Data 0.094 (0.094) Elapsed 0m 0s (remain 0m 1s) Loss: 0.3010(0.3010) 


Epoch 20 - avg_train_loss: 0.4298 avg_val_loss: 0.3614 time: 1s
Epoch 20 - Accuracy: 0.9014806378132119


EVAL: [20/21] Data 0.002 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4595(0.3614) 
Epoch: [21][0/83]Data 0.113 (0.113)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3366(0.3366)Grad: 0.1585  
Epoch: [21][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2707(0.4328)Grad: 0.3168  
EVAL: [0/21] Data 0.103 (0.103) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3056(0.3056) 


Epoch 21 - avg_train_loss: 0.4328 avg_val_loss: 0.3629 time: 1s
Epoch 21 - Accuracy: 0.9018602885345482


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4563(0.3629) 
Epoch: [22][0/83]Data 0.109 (0.109)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3881(0.3881)Grad: 0.2095  
Epoch: [22][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3368(0.4292)Grad: 0.2696  
EVAL: [0/21] Data 0.096 (0.096) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3036(0.3036) 


Epoch 22 - avg_train_loss: 0.4292 avg_val_loss: 0.3616 time: 1s
Epoch 22 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.005 (0.010) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4570(0.3616) 
Epoch: [23][0/83]Data 0.099 (0.099)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4488(0.4488)Grad: 0.1690  
Epoch: [23][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5032(0.4321)Grad: 0.2811  
EVAL: [0/21] Data 0.109 (0.109) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3006(0.3006) 


Epoch 23 - avg_train_loss: 0.4321 avg_val_loss: 0.3600 time: 1s
Epoch 23 - Accuracy: 0.9018602885345482


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4570(0.3600) 
Epoch: [24][0/83]Data 0.121 (0.121)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4101(0.4101)Grad: 0.1436  
Epoch: [24][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2992(0.4287)Grad: 0.2978  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3025(0.3025) 


Epoch 24 - avg_train_loss: 0.4287 avg_val_loss: 0.3605 time: 1s
Epoch 24 - Accuracy: 0.9011009870918755


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4549(0.3605) 
Epoch: [25][0/83]Data 0.097 (0.097)Elapsed 0m 0s (remain 0m 8s)Loss: 0.3959(0.3959)Grad: 0.3387  
Epoch: [25][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3694(0.4273)Grad: 0.2732  
EVAL: [0/21] Data 0.114 (0.114) Elapsed 0m 0s (remain 0m 2s) Loss: 0.2990(0.2990) 


Epoch 25 - avg_train_loss: 0.4273 avg_val_loss: 0.3585 time: 1s
Epoch 25 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4630(0.3585) 
Epoch: [26][0/83]Data 0.106 (0.106)Elapsed 0m 0s (remain 0m 9s)Loss: 0.5006(0.5006)Grad: 0.2187  
Epoch: [26][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4773(0.4263)Grad: 0.3423  
EVAL: [0/21] Data 0.095 (0.095) Elapsed 0m 0s (remain 0m 2s) Loss: 0.2985(0.2985) 


Epoch 26 - avg_train_loss: 0.4263 avg_val_loss: 0.3577 time: 1s
Epoch 26 - Accuracy: 0.9018602885345482


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4594(0.3577) 
Epoch: [27][0/83]Data 0.099 (0.099)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3155(0.3155)Grad: 0.2106  
Epoch: [27][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3652(0.4242)Grad: 0.3454  
EVAL: [0/21] Data 0.105 (0.105) Elapsed 0m 0s (remain 0m 2s) Loss: 0.2987(0.2987) 


Epoch 27 - avg_train_loss: 0.4242 avg_val_loss: 0.3576 time: 1s
Epoch 27 - Accuracy: 0.9018602885345482


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4589(0.3576) 
Epoch: [28][0/83]Data 0.103 (0.103)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4914(0.4914)Grad: 0.1863  
Epoch: [28][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3241(0.4260)Grad: 0.2912  
EVAL: [0/21] Data 0.105 (0.105) Elapsed 0m 0s (remain 0m 2s) Loss: 0.2996(0.2996) 


Epoch 28 - avg_train_loss: 0.4260 avg_val_loss: 0.3575 time: 1s
Epoch 28 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4556(0.3575) 
Epoch: [29][0/83]Data 0.110 (0.110)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4921(0.4921)Grad: 0.2047  
Epoch: [29][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4222(0.4240)Grad: 0.4463  
EVAL: [0/21] Data 0.092 (0.092) Elapsed 0m 0s (remain 0m 2s) Loss: 0.2991(0.2991) 


Epoch 29 - avg_train_loss: 0.4240 avg_val_loss: 0.3573 time: 1s
Epoch 29 - Accuracy: 0.9012908124525436


EVAL: [20/21] Data 0.002 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4571(0.3573) 
Epoch: [30][0/83]Data 0.107 (0.107)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3951(0.3951)Grad: 0.1770  
Epoch: [30][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5539(0.4234)Grad: 0.3274  
EVAL: [0/21] Data 0.100 (0.100) Elapsed 0m 0s (remain 0m 2s) Loss: 0.2990(0.2990) 


Epoch 30 - avg_train_loss: 0.4234 avg_val_loss: 0.3572 time: 1s
Epoch 30 - Accuracy: 0.9014806378132119
Score: 0.90300


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4573(0.3572) 
Epoch: [1][0/83]Data 0.104 (0.104)Elapsed 0m 0s (remain 0m 9s)Loss: 1.6153(1.6153)Grad: 0.6921  
Epoch: [1][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.8281(1.2528)Grad: 0.4551  
EVAL: [0/21] Data 0.094 (0.094) Elapsed 0m 0s (remain 0m 1s) Loss: 0.8331(0.8331) 


Epoch 1 - avg_train_loss: 1.2528 avg_val_loss: 0.8611 time: 1s
Epoch 1 - Accuracy: 0.587129840546697
Epoch 1 - Save Best Score: 0.5871 Mpdel


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.9288(0.8611) 
Epoch: [2][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.9208(0.9208)Grad: 0.4428  
Epoch: [2][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5340(0.6718)Grad: 0.2640  
EVAL: [0/21] Data 0.109 (0.109) Elapsed 0m 0s (remain 0m 2s) Loss: 0.5500(0.5500) 


Epoch 2 - avg_train_loss: 0.6718 avg_val_loss: 0.4922 time: 1s
Epoch 2 - Accuracy: 0.8589597570235383
Epoch 2 - Save Best Score: 0.8590 Mpdel


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5588(0.4922) 
Epoch: [3][0/83]Data 0.116 (0.116)Elapsed 0m 0s (remain 0m 10s)Loss: 0.5050(0.5050)Grad: 0.2440  
Epoch: [3][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3834(0.4932)Grad: 0.3023  
EVAL: [0/21] Data 0.091 (0.091) Elapsed 0m 0s (remain 0m 1s) Loss: 0.5141(0.5141) 


Epoch 3 - avg_train_loss: 0.4932 avg_val_loss: 0.4238 time: 1s
Epoch 3 - Accuracy: 0.8864844343204252
Epoch 3 - Save Best Score: 0.8865 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4948(0.4238) 
Epoch: [4][0/83]Data 0.099 (0.099)Elapsed 0m 0s (remain 0m 8s)Loss: 0.6165(0.6165)Grad: 0.3094  
Epoch: [4][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4383(0.4580)Grad: 0.3463  
EVAL: [0/21] Data 0.105 (0.105) Elapsed 0m 0s (remain 0m 2s) Loss: 0.5050(0.5050) 


Epoch 4 - avg_train_loss: 0.4580 avg_val_loss: 0.4033 time: 1s
Epoch 4 - Accuracy: 0.8874335611237661
Epoch 4 - Save Best Score: 0.8874 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4742(0.4033) 
Epoch: [5][0/83]Data 0.106 (0.106)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4805(0.4805)Grad: 0.1921  
Epoch: [5][82/83]Data 0.000 (0.003)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3492(0.4486)Grad: 0.2892  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.5026(0.5026) 


Epoch 5 - avg_train_loss: 0.4486 avg_val_loss: 0.3963 time: 1s
Epoch 5 - Accuracy: 0.8885725132877752
Epoch 5 - Save Best Score: 0.8886 Mpdel


EVAL: [20/21] Data 0.002 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4680(0.3963) 
Epoch: [6][0/83]Data 0.103 (0.103)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4114(0.4114)Grad: 0.1703  
Epoch: [6][82/83]Data 0.002 (0.003)Elapsed 0m 0s (remain 0m 0s)Loss: 0.7498(0.4422)Grad: 0.5802  
EVAL: [0/21] Data 0.100 (0.100) Elapsed 0m 0s (remain 0m 2s) Loss: 0.5007(0.5007) 


Epoch 6 - avg_train_loss: 0.4422 avg_val_loss: 0.3933 time: 1s
Epoch 6 - Accuracy: 0.8887623386484435
Epoch 6 - Save Best Score: 0.8888 Mpdel


EVAL: [20/21] Data 0.002 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4698(0.3933) 
Epoch: [7][0/83]Data 0.098 (0.098)Elapsed 0m 0s (remain 0m 8s)Loss: 0.4294(0.4294)Grad: 0.1729  
Epoch: [7][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3132(0.4351)Grad: 0.2697  
EVAL: [0/21] Data 0.101 (0.101) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4978(0.4978) 


Epoch 7 - avg_train_loss: 0.4351 avg_val_loss: 0.3913 time: 1s
Epoch 7 - Accuracy: 0.8885725132877752


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4675(0.3913) 
Epoch: [8][0/83]Data 0.130 (0.130)Elapsed 0m 0s (remain 0m 11s)Loss: 0.3607(0.3607)Grad: 0.1437  
Epoch: [8][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5250(0.4333)Grad: 0.3424  
EVAL: [0/21] Data 0.108 (0.108) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4971(0.4971) 


Epoch 8 - avg_train_loss: 0.4333 avg_val_loss: 0.3905 time: 1s
Epoch 8 - Accuracy: 0.8881928625664389


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4673(0.3905) 
Epoch: [9][0/83]Data 0.128 (0.128)Elapsed 0m 0s (remain 0m 11s)Loss: 0.3901(0.3901)Grad: 0.1651  
Epoch: [9][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4609(0.4322)Grad: 0.4064  
EVAL: [0/21] Data 0.123 (0.123) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4967(0.4967) 


Epoch 9 - avg_train_loss: 0.4322 avg_val_loss: 0.3900 time: 1s
Epoch 9 - Accuracy: 0.8881928625664389


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4672(0.3900) 
Epoch: [10][0/83]Data 0.107 (0.107)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4548(0.4548)Grad: 0.1621  
Epoch: [10][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4829(0.4334)Grad: 0.2868  
EVAL: [0/21] Data 0.096 (0.096) Elapsed 0m 0s (remain 0m 1s) Loss: 0.4966(0.4966) 


Epoch 10 - avg_train_loss: 0.4334 avg_val_loss: 0.3899 time: 1s
Epoch 10 - Accuracy: 0.8881928625664389


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4675(0.3899) 
Epoch: [11][0/83]Data 0.110 (0.110)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4715(0.4715)Grad: 0.1405  
Epoch: [11][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3969(0.4339)Grad: 0.2839  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4937(0.4937) 


Epoch 11 - avg_train_loss: 0.4339 avg_val_loss: 0.3875 time: 1s
Epoch 11 - Accuracy: 0.8889521640091116
Epoch 11 - Save Best Score: 0.8890 Mpdel


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4714(0.3875) 
Epoch: [12][0/83]Data 0.099 (0.099)Elapsed 0m 0s (remain 0m 8s)Loss: 0.4926(0.4926)Grad: 0.1534  
Epoch: [12][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3670(0.4269)Grad: 0.3262  
EVAL: [0/21] Data 0.094 (0.094) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4912(0.4912) 


Epoch 12 - avg_train_loss: 0.4269 avg_val_loss: 0.3849 time: 1s
Epoch 12 - Accuracy: 0.8889521640091116


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4689(0.3849) 
Epoch: [13][0/83]Data 0.111 (0.111)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4354(0.4354)Grad: 0.1159  
Epoch: [13][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3602(0.4190)Grad: 0.2561  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4932(0.4932) 


Epoch 13 - avg_train_loss: 0.4190 avg_val_loss: 0.3832 time: 1s
Epoch 13 - Accuracy: 0.8891419893697798
Epoch 13 - Save Best Score: 0.8891 Mpdel


EVAL: [20/21] Data 0.003 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4706(0.3832) 
Epoch: [14][0/83]Data 0.104 (0.104)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3889(0.3889)Grad: 0.1670  
Epoch: [14][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3460(0.4184)Grad: 0.3030  
EVAL: [0/21] Data 0.100 (0.100) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4890(0.4890) 


Epoch 14 - avg_train_loss: 0.4184 avg_val_loss: 0.3817 time: 1s
Epoch 14 - Accuracy: 0.8891419893697798


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4725(0.3817) 
Epoch: [15][0/83]Data 0.107 (0.107)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4334(0.4334)Grad: 0.1712  
Epoch: [15][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2194(0.4181)Grad: 0.2978  
EVAL: [0/21] Data 0.094 (0.094) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4836(0.4836) 


Epoch 15 - avg_train_loss: 0.4181 avg_val_loss: 0.3795 time: 1s
Epoch 15 - Accuracy: 0.8891419893697798


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4700(0.3795) 
Epoch: [16][0/83]Data 0.107 (0.107)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4004(0.4004)Grad: 0.1548  
Epoch: [16][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2218(0.4137)Grad: 0.3067  
EVAL: [0/21] Data 0.103 (0.103) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4846(0.4846) 


Epoch 16 - avg_train_loss: 0.4137 avg_val_loss: 0.3791 time: 1s
Epoch 16 - Accuracy: 0.8891419893697798


EVAL: [20/21] Data 0.002 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4709(0.3791) 
Epoch: [17][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.5065(0.5065)Grad: 0.1752  
Epoch: [17][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3908(0.4106)Grad: 0.2676  
EVAL: [0/21] Data 0.111 (0.111) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4828(0.4828) 


Epoch 17 - avg_train_loss: 0.4106 avg_val_loss: 0.3783 time: 1s
Epoch 17 - Accuracy: 0.889331814730448
Epoch 17 - Save Best Score: 0.8893 Mpdel


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4704(0.3783) 
Epoch: [18][0/83]Data 0.102 (0.102)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3627(0.3627)Grad: 0.1625  
Epoch: [18][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.6609(0.4091)Grad: 0.4836  
EVAL: [0/21] Data 0.099 (0.099) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4817(0.4817) 


Epoch 18 - avg_train_loss: 0.4091 avg_val_loss: 0.3777 time: 1s
Epoch 18 - Accuracy: 0.8891419893697798


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4700(0.3777) 
Epoch: [19][0/83]Data 0.115 (0.115)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3823(0.3823)Grad: 0.1712  
Epoch: [19][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3224(0.4122)Grad: 0.2642  
EVAL: [0/21] Data 0.100 (0.100) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4810(0.4810) 


Epoch 19 - avg_train_loss: 0.4122 avg_val_loss: 0.3774 time: 1s
Epoch 19 - Accuracy: 0.889331814730448


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4696(0.3774) 
Epoch: [20][0/83]Data 0.117 (0.117)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4275(0.4275)Grad: 0.1635  
Epoch: [20][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3039(0.4103)Grad: 0.4639  
EVAL: [0/21] Data 0.106 (0.106) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4810(0.4810) 


Epoch 20 - avg_train_loss: 0.4103 avg_val_loss: 0.3774 time: 1s
Epoch 20 - Accuracy: 0.889331814730448


EVAL: [20/21] Data 0.000 (0.010) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4696(0.3774) 
Epoch: [21][0/83]Data 0.118 (0.118)Elapsed 0m 0s (remain 0m 10s)Loss: 0.2654(0.2654)Grad: 0.1875  
Epoch: [21][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3819(0.4140)Grad: 0.2505  
EVAL: [0/21] Data 0.095 (0.095) Elapsed 0m 0s (remain 0m 1s) Loss: 0.4747(0.4747) 


Epoch 21 - avg_train_loss: 0.4140 avg_val_loss: 0.3758 time: 1s
Epoch 21 - Accuracy: 0.8891419893697798


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4664(0.3758) 
Epoch: [22][0/83]Data 0.109 (0.109)Elapsed 0m 0s (remain 0m 9s)Loss: 0.5326(0.5326)Grad: 0.1712  
Epoch: [22][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3916(0.4116)Grad: 0.2724  
EVAL: [0/21] Data 0.151 (0.151) Elapsed 0m 0s (remain 0m 3s) Loss: 0.4765(0.4765) 


Epoch 22 - avg_train_loss: 0.4116 avg_val_loss: 0.3744 time: 1s
Epoch 22 - Accuracy: 0.8887623386484435


EVAL: [20/21] Data 0.002 (0.011) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4655(0.3744) 
Epoch: [23][0/83]Data 0.120 (0.120)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4382(0.4382)Grad: 0.1865  
Epoch: [23][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4171(0.4081)Grad: 0.2655  
EVAL: [0/21] Data 0.102 (0.102) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4721(0.4721) 


Epoch 23 - avg_train_loss: 0.4081 avg_val_loss: 0.3732 time: 1s
Epoch 23 - Accuracy: 0.8885725132877752


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4691(0.3732) 
Epoch: [24][0/83]Data 0.107 (0.107)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4237(0.4237)Grad: 0.2316  
Epoch: [24][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4678(0.4064)Grad: 0.4630  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4697(0.4697) 


Epoch 24 - avg_train_loss: 0.4064 avg_val_loss: 0.3721 time: 1s
Epoch 24 - Accuracy: 0.8891419893697798


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4687(0.3721) 
Epoch: [25][0/83]Data 0.119 (0.119)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3613(0.3613)Grad: 0.1472  
Epoch: [25][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3896(0.4056)Grad: 0.2578  
EVAL: [0/21] Data 0.105 (0.105) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4670(0.4670) 


Epoch 25 - avg_train_loss: 0.4056 avg_val_loss: 0.3709 time: 1s
Epoch 25 - Accuracy: 0.8891419893697798


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4654(0.3709) 
Epoch: [26][0/83]Data 0.109 (0.109)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4543(0.4543)Grad: 0.1459  
Epoch: [26][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3376(0.4079)Grad: 0.2365  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4682(0.4682) 


Epoch 26 - avg_train_loss: 0.4079 avg_val_loss: 0.3702 time: 1s
Epoch 26 - Accuracy: 0.8887623386484435


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4659(0.3702) 
Epoch: [27][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4051(0.4051)Grad: 0.1447  
Epoch: [27][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3015(0.4064)Grad: 0.3249  
EVAL: [0/21] Data 0.098 (0.098) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4650(0.4650) 


Epoch 27 - avg_train_loss: 0.4064 avg_val_loss: 0.3693 time: 1s
Epoch 27 - Accuracy: 0.8889521640091116


EVAL: [20/21] Data 0.002 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4647(0.3693) 
Epoch: [28][0/83]Data 0.118 (0.118)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4070(0.4070)Grad: 0.1442  
Epoch: [28][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4383(0.4017)Grad: 0.2894  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4653(0.4653) 


Epoch 28 - avg_train_loss: 0.4017 avg_val_loss: 0.3692 time: 1s
Epoch 28 - Accuracy: 0.8895216400911162
Epoch 28 - Save Best Score: 0.8895 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4657(0.3692) 
Epoch: [29][0/83]Data 0.111 (0.111)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4588(0.4588)Grad: 0.2114  
Epoch: [29][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.6992(0.4030)Grad: 0.4573  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4658(0.4658) 


Epoch 29 - avg_train_loss: 0.4030 avg_val_loss: 0.3691 time: 1s
Epoch 29 - Accuracy: 0.8887623386484435


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4658(0.3691) 
Epoch: [30][0/83]Data 0.096 (0.096)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3053(0.3053)Grad: 0.1266  
Epoch: [30][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2603(0.3993)Grad: 0.2469  
EVAL: [0/21] Data 0.118 (0.118) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4656(0.4656) 


Epoch 30 - avg_train_loss: 0.3993 avg_val_loss: 0.3691 time: 1s
Epoch 30 - Accuracy: 0.889331814730448
Score: 0.88952


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4657(0.3691) 
Epoch: [1][0/83]Data 0.116 (0.116)Elapsed 0m 0s (remain 0m 10s)Loss: 1.6299(1.6299)Grad: 0.7456  
Epoch: [1][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.8661(1.1886)Grad: 0.4857  
EVAL: [0/21] Data 0.112 (0.112) Elapsed 0m 0s (remain 0m 2s) Loss: 0.8004(0.8004) 


Epoch 1 - avg_train_loss: 1.1886 avg_val_loss: 0.8159 time: 1s
Epoch 1 - Accuracy: 0.587051452439719
Epoch 1 - Save Best Score: 0.5871 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.8802(0.8159) 
Epoch: [2][0/83]Data 0.107 (0.107)Elapsed 0m 0s (remain 0m 9s)Loss: 0.8965(0.8965)Grad: 0.4185  
Epoch: [2][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5618(0.6316)Grad: 0.2964  
EVAL: [0/21] Data 0.096 (0.096) Elapsed 0m 0s (remain 0m 1s) Loss: 0.4889(0.4889) 


Epoch 2 - avg_train_loss: 0.6316 avg_val_loss: 0.4753 time: 1s
Epoch 2 - Accuracy: 0.887981773305487
Epoch 2 - Save Best Score: 0.8880 Mpdel


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4392(0.4753) 
Epoch: [3][0/83]Data 0.096 (0.096)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4915(0.4915)Grad: 0.2126  
Epoch: [3][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4745(0.4583)Grad: 0.3715  
EVAL: [0/21] Data 0.106 (0.106) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4330(0.4330) 


Epoch 3 - avg_train_loss: 0.4583 avg_val_loss: 0.4120 time: 1s
Epoch 3 - Accuracy: 0.8908296943231441
Epoch 3 - Save Best Score: 0.8908 Mpdel


EVAL: [20/21] Data 0.002 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3430(0.4120) 
Epoch: [4][0/83]Data 0.104 (0.104)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3945(0.3945)Grad: 0.2276  
Epoch: [4][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4346(0.4417)Grad: 0.4057  
EVAL: [0/21] Data 0.116 (0.116) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4230(0.4230) 


Epoch 4 - avg_train_loss: 0.4417 avg_val_loss: 0.4018 time: 1s
Epoch 4 - Accuracy: 0.8921587241313841
Epoch 4 - Save Best Score: 0.8922 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3280(0.4018) 
Epoch: [5][0/83]Data 0.117 (0.117)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4411(0.4411)Grad: 0.2079  
Epoch: [5][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5701(0.4304)Grad: 0.4090  
EVAL: [0/21] Data 0.103 (0.103) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4211(0.4211) 


Epoch 5 - avg_train_loss: 0.4304 avg_val_loss: 0.3994 time: 1s
Epoch 5 - Accuracy: 0.8921587241313841


EVAL: [20/21] Data 0.002 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3250(0.3994) 
Epoch: [6][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.5434(0.5434)Grad: 0.2638  
Epoch: [6][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2147(0.4308)Grad: 0.2874  
EVAL: [0/21] Data 0.100 (0.100) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4184(0.4184) 


Epoch 6 - avg_train_loss: 0.4308 avg_val_loss: 0.3967 time: 1s
Epoch 6 - Accuracy: 0.8927283083349156
Epoch 6 - Save Best Score: 0.8927 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3232(0.3967) 
Epoch: [7][0/83]Data 0.119 (0.119)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4851(0.4851)Grad: 0.2012  
Epoch: [7][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3267(0.4270)Grad: 0.3819  
EVAL: [0/21] Data 0.096 (0.096) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4164(0.4164) 


Epoch 7 - avg_train_loss: 0.4270 avg_val_loss: 0.3954 time: 1s
Epoch 7 - Accuracy: 0.8927283083349156


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3228(0.3954) 
Epoch: [8][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4274(0.4274)Grad: 0.2263  
Epoch: [8][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3258(0.4282)Grad: 0.2516  
EVAL: [0/21] Data 0.113 (0.113) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4154(0.4154) 


Epoch 8 - avg_train_loss: 0.4282 avg_val_loss: 0.3947 time: 1s
Epoch 8 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3222(0.3947) 
Epoch: [9][0/83]Data 0.106 (0.106)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4744(0.4744)Grad: 0.1466  
Epoch: [9][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5548(0.4260)Grad: 0.5129  
EVAL: [0/21] Data 0.102 (0.102) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4154(0.4154) 


Epoch 9 - avg_train_loss: 0.4260 avg_val_loss: 0.3945 time: 1s
Epoch 9 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3221(0.3945) 
Epoch: [10][0/83]Data 0.111 (0.111)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3980(0.3980)Grad: 0.1786  
Epoch: [10][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5819(0.4226)Grad: 0.4762  
EVAL: [0/21] Data 0.107 (0.107) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4153(0.4153) 


Epoch 10 - avg_train_loss: 0.4226 avg_val_loss: 0.3945 time: 1s
Epoch 10 - Accuracy: 0.8925384469337384


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3223(0.3945) 
Epoch: [11][0/83]Data 0.103 (0.103)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3437(0.3437)Grad: 0.1699  
Epoch: [11][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4675(0.4253)Grad: 0.3112  
EVAL: [0/21] Data 0.095 (0.095) Elapsed 0m 0s (remain 0m 1s) Loss: 0.4164(0.4164) 


Epoch 11 - avg_train_loss: 0.4253 avg_val_loss: 0.3939 time: 1s
Epoch 11 - Accuracy: 0.8919688627302069


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3203(0.3939) 
Epoch: [12][0/83]Data 0.113 (0.113)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3342(0.3342)Grad: 0.1613  
Epoch: [12][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.6604(0.4210)Grad: 0.7159  
EVAL: [0/21] Data 0.106 (0.106) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4147(0.4147) 


Epoch 12 - avg_train_loss: 0.4210 avg_val_loss: 0.3923 time: 1s
Epoch 12 - Accuracy: 0.8929181697360926
Epoch 12 - Save Best Score: 0.8929 Mpdel


EVAL: [20/21] Data 0.002 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3205(0.3923) 
Epoch: [13][0/83]Data 0.108 (0.108)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4155(0.4155)Grad: 0.1906  
Epoch: [13][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3421(0.4174)Grad: 0.3047  
EVAL: [0/21] Data 0.088 (0.088) Elapsed 0m 0s (remain 0m 1s) Loss: 0.4127(0.4127) 


Epoch 13 - avg_train_loss: 0.4174 avg_val_loss: 0.3900 time: 1s
Epoch 13 - Accuracy: 0.8923485855325612


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3224(0.3900) 
Epoch: [14][0/83]Data 0.098 (0.098)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4152(0.4152)Grad: 0.1565  
Epoch: [14][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4841(0.4183)Grad: 0.3883  
EVAL: [0/21] Data 0.109 (0.109) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4122(0.4122) 


Epoch 14 - avg_train_loss: 0.4183 avg_val_loss: 0.3892 time: 1s
Epoch 14 - Accuracy: 0.8927283083349156


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3205(0.3892) 
Epoch: [15][0/83]Data 0.114 (0.114)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3721(0.3721)Grad: 0.2248  
Epoch: [15][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4404(0.4194)Grad: 0.2828  
EVAL: [0/21] Data 0.102 (0.102) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4093(0.4093) 


Epoch 15 - avg_train_loss: 0.4194 avg_val_loss: 0.3890 time: 1s
Epoch 15 - Accuracy: 0.8913992785266756


EVAL: [20/21] Data 0.002 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3237(0.3890) 
Epoch: [16][0/83]Data 0.109 (0.109)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4428(0.4428)Grad: 0.1973  
Epoch: [16][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3845(0.4136)Grad: 0.3017  
EVAL: [0/21] Data 0.115 (0.115) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4090(0.4090) 


Epoch 16 - avg_train_loss: 0.4136 avg_val_loss: 0.3870 time: 1s
Epoch 16 - Accuracy: 0.8921587241313841


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3216(0.3870) 
Epoch: [17][0/83]Data 0.118 (0.118)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3125(0.3125)Grad: 0.1924  
Epoch: [17][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4141(0.4092)Grad: 0.3208  
EVAL: [0/21] Data 0.097 (0.097) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4103(0.4103) 


Epoch 17 - avg_train_loss: 0.4092 avg_val_loss: 0.3868 time: 1s
Epoch 17 - Accuracy: 0.8919688627302069


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3210(0.3868) 
Epoch: [18][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3652(0.3652)Grad: 0.1426  
Epoch: [18][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4817(0.4109)Grad: 0.2932  
EVAL: [0/21] Data 0.107 (0.107) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4100(0.4100) 


Epoch 18 - avg_train_loss: 0.4109 avg_val_loss: 0.3869 time: 1s
Epoch 18 - Accuracy: 0.8913992785266756


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3221(0.3869) 
Epoch: [19][0/83]Data 0.108 (0.108)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4237(0.4237)Grad: 0.2131  
Epoch: [19][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4834(0.4113)Grad: 0.3949  
EVAL: [0/21] Data 0.100 (0.100) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4097(0.4097) 


Epoch 19 - avg_train_loss: 0.4113 avg_val_loss: 0.3867 time: 1s
Epoch 19 - Accuracy: 0.8913992785266756


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3219(0.3867) 
Epoch: [20][0/83]Data 0.122 (0.122)Elapsed 0m 0s (remain 0m 10s)Loss: 0.2751(0.2751)Grad: 0.2151  
Epoch: [20][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4899(0.4139)Grad: 0.3596  
EVAL: [0/21] Data 0.096 (0.096) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4096(0.4096) 


Epoch 20 - avg_train_loss: 0.4139 avg_val_loss: 0.3867 time: 1s
Epoch 20 - Accuracy: 0.8913992785266756


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3220(0.3867) 
Epoch: [21][0/83]Data 0.110 (0.110)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4431(0.4431)Grad: 0.1756  
Epoch: [21][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4606(0.4112)Grad: 0.4902  
EVAL: [0/21] Data 0.116 (0.116) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4091(0.4091) 


Epoch 21 - avg_train_loss: 0.4112 avg_val_loss: 0.3863 time: 1s
Epoch 21 - Accuracy: 0.8921587241313841


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3238(0.3863) 
Epoch: [22][0/83]Data 0.155 (0.155)Elapsed 0m 0s (remain 0m 14s)Loss: 0.3564(0.3564)Grad: 0.1538  
Epoch: [22][82/83]Data 0.002 (0.005)Elapsed 0m 1s (remain 0m 0s)Loss: 0.2905(0.4103)Grad: 0.2808  
EVAL: [0/21] Data 0.099 (0.099) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4085(0.4085) 


Epoch 22 - avg_train_loss: 0.4103 avg_val_loss: 0.3857 time: 1s
Epoch 22 - Accuracy: 0.8923485855325612


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3251(0.3857) 
Epoch: [23][0/83]Data 0.101 (0.101)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3721(0.3721)Grad: 0.1784  
Epoch: [23][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3887(0.4083)Grad: 0.3277  
EVAL: [0/21] Data 0.116 (0.116) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4097(0.4097) 


Epoch 23 - avg_train_loss: 0.4083 avg_val_loss: 0.3841 time: 1s
Epoch 23 - Accuracy: 0.8917790013290298


EVAL: [20/21] Data 0.002 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3250(0.3841) 
Epoch: [24][0/83]Data 0.112 (0.112)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3414(0.3414)Grad: 0.1840  
Epoch: [24][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3585(0.4085)Grad: 0.2617  
EVAL: [0/21] Data 0.130 (0.130) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4099(0.4099) 


Epoch 24 - avg_train_loss: 0.4085 avg_val_loss: 0.3845 time: 1s
Epoch 24 - Accuracy: 0.8921587241313841


EVAL: [20/21] Data 0.000 (0.012) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3246(0.3845) 
Epoch: [25][0/83]Data 0.114 (0.114)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3385(0.3385)Grad: 0.1692  
Epoch: [25][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.6937(0.4063)Grad: 0.4482  
EVAL: [0/21] Data 0.110 (0.110) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4082(0.4082) 


Epoch 25 - avg_train_loss: 0.4063 avg_val_loss: 0.3831 time: 1s
Epoch 25 - Accuracy: 0.8917790013290298


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3231(0.3831) 
Epoch: [26][0/83]Data 0.095 (0.095)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3647(0.3647)Grad: 0.1690  
Epoch: [26][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3132(0.4055)Grad: 0.2880  
EVAL: [0/21] Data 0.107 (0.107) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4075(0.4075) 


Epoch 26 - avg_train_loss: 0.4055 avg_val_loss: 0.3834 time: 1s
Epoch 26 - Accuracy: 0.8921587241313841


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3250(0.3834) 
Epoch: [27][0/83]Data 0.094 (0.094)Elapsed 0m 0s (remain 0m 8s)Loss: 0.4215(0.4215)Grad: 0.1758  
Epoch: [27][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5679(0.4036)Grad: 0.3642  
EVAL: [0/21] Data 0.114 (0.114) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4072(0.4072) 


Epoch 27 - avg_train_loss: 0.4036 avg_val_loss: 0.3822 time: 1s
Epoch 27 - Accuracy: 0.8927283083349156


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3242(0.3822) 
Epoch: [28][0/83]Data 0.110 (0.110)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3750(0.3750)Grad: 0.1736  
Epoch: [28][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3130(0.4034)Grad: 0.3604  
EVAL: [0/21] Data 0.094 (0.094) Elapsed 0m 0s (remain 0m 1s) Loss: 0.4064(0.4064) 


Epoch 28 - avg_train_loss: 0.4034 avg_val_loss: 0.3824 time: 1s
Epoch 28 - Accuracy: 0.8917790013290298


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3247(0.3824) 
Epoch: [29][0/83]Data 0.112 (0.112)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3756(0.3756)Grad: 0.1875  
Epoch: [29][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3799(0.4043)Grad: 0.2978  
EVAL: [0/21] Data 0.111 (0.111) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4068(0.4068) 


Epoch 29 - avg_train_loss: 0.4043 avg_val_loss: 0.3818 time: 1s
Epoch 29 - Accuracy: 0.8919688627302069


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3235(0.3818) 
Epoch: [30][0/83]Data 0.112 (0.112)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3977(0.3977)Grad: 0.2390  
Epoch: [30][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4920(0.4015)Grad: 0.5322  
EVAL: [0/21] Data 0.108 (0.108) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4067(0.4067) 


Epoch 30 - avg_train_loss: 0.4015 avg_val_loss: 0.3818 time: 1s
Epoch 30 - Accuracy: 0.8919688627302069
Score: 0.89292


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3236(0.3818) 
Epoch: [1][0/83]Data 0.102 (0.102)Elapsed 0m 0s (remain 0m 9s)Loss: 1.6428(1.6428)Grad: 0.7508  
Epoch: [1][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 1.1050(1.2563)Grad: 0.5197  
EVAL: [0/21] Data 0.090 (0.090) Elapsed 0m 0s (remain 0m 1s) Loss: 0.9160(0.9160) 


Epoch 1 - avg_train_loss: 1.2563 avg_val_loss: 0.9001 time: 1s
Epoch 1 - Accuracy: 0.587051452439719
Epoch 1 - Save Best Score: 0.5871 Mpdel


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 1.0817(0.9001) 
Epoch: [2][0/83]Data 0.111 (0.111)Elapsed 0m 0s (remain 0m 9s)Loss: 0.8787(0.8787)Grad: 0.5109  
Epoch: [2][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4474(0.7042)Grad: 0.2990  
EVAL: [0/21] Data 0.103 (0.103) Elapsed 0m 0s (remain 0m 2s) Loss: 0.5309(0.5309) 


Epoch 2 - avg_train_loss: 0.7042 avg_val_loss: 0.4906 time: 1s
Epoch 2 - Accuracy: 0.886652743497247
Epoch 2 - Save Best Score: 0.8867 Mpdel


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.6075(0.4906) 
Epoch: [3][0/83]Data 0.112 (0.112)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4904(0.4904)Grad: 0.3306  
Epoch: [3][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2780(0.4802)Grad: 0.2701  
EVAL: [0/21] Data 0.103 (0.103) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4596(0.4596) 


Epoch 3 - avg_train_loss: 0.4802 avg_val_loss: 0.3973 time: 1s
Epoch 3 - Accuracy: 0.8906398329219669
Epoch 3 - Save Best Score: 0.8906 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5122(0.3973) 
Epoch: [4][0/83]Data 0.096 (0.096)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4925(0.4925)Grad: 0.1629  
Epoch: [4][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3957(0.4404)Grad: 0.3012  
EVAL: [0/21] Data 0.101 (0.101) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4556(0.4556) 


Epoch 4 - avg_train_loss: 0.4404 avg_val_loss: 0.3853 time: 1s
Epoch 4 - Accuracy: 0.8919688627302069
Epoch 4 - Save Best Score: 0.8920 Mpdel


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5100(0.3853) 
Epoch: [5][0/83]Data 0.120 (0.120)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3956(0.3956)Grad: 0.2147  
Epoch: [5][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5892(0.4300)Grad: 0.4320  
EVAL: [0/21] Data 0.123 (0.123) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4539(0.4539) 


Epoch 5 - avg_train_loss: 0.4300 avg_val_loss: 0.3813 time: 1s
Epoch 5 - Accuracy: 0.8917790013290298


EVAL: [20/21] Data 0.002 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5104(0.3813) 
Epoch: [6][0/83]Data 0.094 (0.094)Elapsed 0m 0s (remain 0m 8s)Loss: 0.5213(0.5213)Grad: 0.1269  
Epoch: [6][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3529(0.4280)Grad: 0.2509  
EVAL: [0/21] Data 0.108 (0.108) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4535(0.4535) 


Epoch 6 - avg_train_loss: 0.4280 avg_val_loss: 0.3803 time: 1s
Epoch 6 - Accuracy: 0.8912094171254984


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5170(0.3803) 
Epoch: [7][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4301(0.4301)Grad: 0.1778  
Epoch: [7][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4034(0.4273)Grad: 0.3346  
EVAL: [0/21] Data 0.117 (0.117) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4534(0.4534) 


Epoch 7 - avg_train_loss: 0.4273 avg_val_loss: 0.3793 time: 1s
Epoch 7 - Accuracy: 0.8915891399278527


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5167(0.3793) 
Epoch: [8][0/83]Data 0.113 (0.113)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4443(0.4443)Grad: 0.1844  
Epoch: [8][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5282(0.4260)Grad: 0.3051  
EVAL: [0/21] Data 0.117 (0.117) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4538(0.4538) 


Epoch 8 - avg_train_loss: 0.4260 avg_val_loss: 0.3787 time: 1s
Epoch 8 - Accuracy: 0.8921587241313841
Epoch 8 - Save Best Score: 0.8922 Mpdel


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5166(0.3787) 
Epoch: [9][0/83]Data 0.110 (0.110)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3375(0.3375)Grad: 0.1565  
Epoch: [9][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3693(0.4267)Grad: 0.2432  
EVAL: [0/21] Data 0.098 (0.098) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4537(0.4537) 


Epoch 9 - avg_train_loss: 0.4267 avg_val_loss: 0.3787 time: 1s
Epoch 9 - Accuracy: 0.8919688627302069


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5181(0.3787) 
Epoch: [10][0/83]Data 0.113 (0.113)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4654(0.4654)Grad: 0.2119  
Epoch: [10][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2988(0.4259)Grad: 0.2405  
EVAL: [0/21] Data 0.099 (0.099) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4536(0.4536) 


Epoch 10 - avg_train_loss: 0.4259 avg_val_loss: 0.3786 time: 1s
Epoch 10 - Accuracy: 0.8919688627302069


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5180(0.3786) 
Epoch: [11][0/83]Data 0.118 (0.118)Elapsed 0m 0s (remain 0m 10s)Loss: 0.5007(0.5007)Grad: 0.1985  
Epoch: [11][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4606(0.4250)Grad: 0.3272  
EVAL: [0/21] Data 0.112 (0.112) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4539(0.4539) 


Epoch 11 - avg_train_loss: 0.4250 avg_val_loss: 0.3782 time: 1s
Epoch 11 - Accuracy: 0.8921587241313841


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5230(0.3782) 
Epoch: [12][0/83]Data 0.113 (0.113)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3495(0.3495)Grad: 0.2414  
Epoch: [12][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.8538(0.4257)Grad: 0.5391  
EVAL: [0/21] Data 0.114 (0.114) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4554(0.4554) 


Epoch 12 - avg_train_loss: 0.4257 avg_val_loss: 0.3767 time: 1s
Epoch 12 - Accuracy: 0.8925384469337384
Epoch 12 - Save Best Score: 0.8925 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5247(0.3767) 
Epoch: [13][0/83]Data 0.101 (0.101)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4332(0.4332)Grad: 0.1687  
Epoch: [13][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3744(0.4234)Grad: 0.2807  
EVAL: [0/21] Data 0.103 (0.103) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4549(0.4549) 


Epoch 13 - avg_train_loss: 0.4234 avg_val_loss: 0.3761 time: 1s
Epoch 13 - Accuracy: 0.8929181697360926
Epoch 13 - Save Best Score: 0.8929 Mpdel


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5255(0.3761) 
Epoch: [14][0/83]Data 0.112 (0.112)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4102(0.4102)Grad: 0.1552  
Epoch: [14][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5465(0.4219)Grad: 0.4494  
EVAL: [0/21] Data 0.114 (0.114) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4544(0.4544) 


Epoch 14 - avg_train_loss: 0.4219 avg_val_loss: 0.3761 time: 1s
Epoch 14 - Accuracy: 0.8927283083349156


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5308(0.3761) 
Epoch: [15][0/83]Data 0.097 (0.097)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3702(0.3702)Grad: 0.1568  
Epoch: [15][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4452(0.4174)Grad: 0.2529  
EVAL: [0/21] Data 0.116 (0.116) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4541(0.4541) 


Epoch 15 - avg_train_loss: 0.4174 avg_val_loss: 0.3758 time: 1s
Epoch 15 - Accuracy: 0.8931080311372698
Epoch 15 - Save Best Score: 0.8931 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5292(0.3758) 
Epoch: [16][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3543(0.3543)Grad: 0.1453  
Epoch: [16][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4785(0.4173)Grad: 0.3480  
EVAL: [0/21] Data 0.094 (0.094) Elapsed 0m 0s (remain 0m 1s) Loss: 0.4528(0.4528) 


Epoch 16 - avg_train_loss: 0.4173 avg_val_loss: 0.3745 time: 1s
Epoch 16 - Accuracy: 0.8936776153408013
Epoch 16 - Save Best Score: 0.8937 Mpdel


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5250(0.3745) 
Epoch: [17][0/83]Data 0.101 (0.101)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4303(0.4303)Grad: 0.1773  
Epoch: [17][82/83]Data 0.003 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4338(0.4154)Grad: 0.2949  
EVAL: [0/21] Data 0.103 (0.103) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4538(0.4538) 


Epoch 17 - avg_train_loss: 0.4154 avg_val_loss: 0.3743 time: 1s
Epoch 17 - Accuracy: 0.8936776153408013


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5268(0.3743) 
Epoch: [18][0/83]Data 0.106 (0.106)Elapsed 0m 0s (remain 0m 9s)Loss: 0.5238(0.5238)Grad: 0.1676  
Epoch: [18][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5451(0.4136)Grad: 0.3170  
EVAL: [0/21] Data 0.098 (0.098) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4539(0.4539) 


Epoch 18 - avg_train_loss: 0.4136 avg_val_loss: 0.3745 time: 1s
Epoch 18 - Accuracy: 0.8929181697360926


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5289(0.3745) 
Epoch: [19][0/83]Data 0.108 (0.108)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3983(0.3983)Grad: 0.1157  
Epoch: [19][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3751(0.4141)Grad: 0.2819  
EVAL: [0/21] Data 0.105 (0.105) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4536(0.4536) 


Epoch 19 - avg_train_loss: 0.4141 avg_val_loss: 0.3740 time: 1s
Epoch 19 - Accuracy: 0.8934877539396241


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5272(0.3740) 
Epoch: [20][0/83]Data 0.096 (0.096)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4471(0.4471)Grad: 0.1346  
Epoch: [20][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3812(0.4144)Grad: 0.3146  
EVAL: [0/21] Data 0.126 (0.126) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4536(0.4536) 


Epoch 20 - avg_train_loss: 0.4144 avg_val_loss: 0.3740 time: 1s
Epoch 20 - Accuracy: 0.8934877539396241


EVAL: [20/21] Data 0.002 (0.010) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5269(0.3740) 
Epoch: [21][0/83]Data 0.160 (0.160)Elapsed 0m 0s (remain 0m 13s)Loss: 0.5897(0.5897)Grad: 0.2533  
Epoch: [21][82/83]Data 0.000 (0.005)Elapsed 0m 1s (remain 0m 0s)Loss: 0.4919(0.4128)Grad: 0.2794  
EVAL: [0/21] Data 0.106 (0.106) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4541(0.4541) 


Epoch 21 - avg_train_loss: 0.4128 avg_val_loss: 0.3741 time: 1s
Epoch 21 - Accuracy: 0.8934877539396241


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5263(0.3741) 
Epoch: [22][0/83]Data 0.100 (0.100)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3808(0.3808)Grad: 0.1483  
Epoch: [22][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.6316(0.4178)Grad: 0.3311  
EVAL: [0/21] Data 0.115 (0.115) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4523(0.4523) 
EVAL: [20/21] Data 0.000 (0.012) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5231(0.3724) 


Epoch 22 - avg_train_loss: 0.4178 avg_val_loss: 0.3724 time: 1s
Epoch 22 - Accuracy: 0.8934877539396241


Epoch: [23][0/83]Data 0.117 (0.117)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4396(0.4396)Grad: 0.1294  
Epoch: [23][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2461(0.4131)Grad: 0.2827  
EVAL: [0/21] Data 0.096 (0.096) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4514(0.4514) 


Epoch 23 - avg_train_loss: 0.4131 avg_val_loss: 0.3717 time: 1s
Epoch 23 - Accuracy: 0.8934877539396241


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5180(0.3717) 
Epoch: [24][0/83]Data 0.118 (0.118)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3731(0.3731)Grad: 0.1195  
Epoch: [24][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4534(0.4102)Grad: 0.3120  
EVAL: [0/21] Data 0.113 (0.113) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4518(0.4518) 


Epoch 24 - avg_train_loss: 0.4102 avg_val_loss: 0.3710 time: 1s
Epoch 24 - Accuracy: 0.8940573381431555
Epoch 24 - Save Best Score: 0.8941 Mpdel


EVAL: [20/21] Data 0.001 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5206(0.3710) 
Epoch: [25][0/83]Data 0.105 (0.105)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4465(0.4465)Grad: 0.1363  
Epoch: [25][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.6024(0.4091)Grad: 0.3889  
EVAL: [0/21] Data 0.111 (0.111) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4509(0.4509) 


Epoch 25 - avg_train_loss: 0.4091 avg_val_loss: 0.3712 time: 1s
Epoch 25 - Accuracy: 0.8931080311372698


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5266(0.3712) 
Epoch: [26][0/83]Data 0.094 (0.094)Elapsed 0m 0s (remain 0m 9s)Loss: 0.2673(0.2673)Grad: 0.1494  
Epoch: [26][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4432(0.4074)Grad: 0.2673  
EVAL: [0/21] Data 0.101 (0.101) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4502(0.4502) 


Epoch 26 - avg_train_loss: 0.4074 avg_val_loss: 0.3708 time: 1s
Epoch 26 - Accuracy: 0.8931080311372698


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5245(0.3708) 
Epoch: [27][0/83]Data 0.121 (0.121)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4308(0.4308)Grad: 0.1388  
Epoch: [27][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5261(0.4099)Grad: 0.3072  
EVAL: [0/21] Data 0.077 (0.077) Elapsed 0m 0s (remain 0m 1s) Loss: 0.4494(0.4494) 


Epoch 27 - avg_train_loss: 0.4099 avg_val_loss: 0.3697 time: 1s
Epoch 27 - Accuracy: 0.8936776153408013


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5198(0.3697) 
Epoch: [28][0/83]Data 0.115 (0.115)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4129(0.4129)Grad: 0.1509  
Epoch: [28][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4233(0.4101)Grad: 0.2608  
EVAL: [0/21] Data 0.105 (0.105) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4486(0.4486) 


Epoch 28 - avg_train_loss: 0.4101 avg_val_loss: 0.3695 time: 1s
Epoch 28 - Accuracy: 0.8936776153408013


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5203(0.3695) 
Epoch: [29][0/83]Data 0.136 (0.136)Elapsed 0m 0s (remain 0m 13s)Loss: 0.3935(0.3935)Grad: 0.1640  
Epoch: [29][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4464(0.4095)Grad: 0.2901  
EVAL: [0/21] Data 0.097 (0.097) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4485(0.4485) 


Epoch 29 - avg_train_loss: 0.4095 avg_val_loss: 0.3694 time: 1s
Epoch 29 - Accuracy: 0.8934877539396241


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5197(0.3694) 
Epoch: [30][0/83]Data 0.119 (0.119)Elapsed 0m 0s (remain 0m 10s)Loss: 0.5464(0.5464)Grad: 0.1719  
Epoch: [30][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3224(0.4085)Grad: 0.2144  
EVAL: [0/21] Data 0.099 (0.099) Elapsed 0m 0s (remain 0m 2s) Loss: 0.4485(0.4485) 


Epoch 30 - avg_train_loss: 0.4085 avg_val_loss: 0.3694 time: 1s
Epoch 30 - Accuracy: 0.8932978925384469
Score: 0.89406


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.5201(0.3694) 
Epoch: [1][0/83]Data 0.114 (0.114)Elapsed 0m 0s (remain 0m 10s)Loss: 1.6212(1.6212)Grad: 1.0079  
Epoch: [1][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.7268(1.1448)Grad: 0.6513  
EVAL: [0/21] Data 0.115 (0.115) Elapsed 0m 0s (remain 0m 2s) Loss: 0.6423(0.6423) 


Epoch 1 - avg_train_loss: 1.1448 avg_val_loss: 0.7044 time: 1s
Epoch 1 - Accuracy: 0.8486804632618189
Epoch 1 - Save Best Score: 0.8487 Mpdel


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.8329(0.7044) 
Epoch: [2][0/83]Data 0.112 (0.112)Elapsed 0m 0s (remain 0m 9s)Loss: 0.6960(0.6960)Grad: 0.5347  
Epoch: [2][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4119(0.5511)Grad: 0.3766  
EVAL: [0/21] Data 0.097 (0.097) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3535(0.3535) 


Epoch 2 - avg_train_loss: 0.5511 avg_val_loss: 0.4134 time: 1s
Epoch 2 - Accuracy: 0.8895006645149042
Epoch 2 - Save Best Score: 0.8895 Mpdel


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4669(0.4134) 
Epoch: [3][0/83]Data 0.118 (0.118)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4065(0.4065)Grad: 0.2390  
Epoch: [3][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3430(0.4468)Grad: 0.2865  
EVAL: [0/21] Data 0.093 (0.093) Elapsed 0m 0s (remain 0m 1s) Loss: 0.3412(0.3412) 


Epoch 3 - avg_train_loss: 0.4468 avg_val_loss: 0.3946 time: 1s
Epoch 3 - Accuracy: 0.8900702487184355
Epoch 3 - Save Best Score: 0.8901 Mpdel


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4261(0.3946) 
Epoch: [4][0/83]Data 0.101 (0.101)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3918(0.3918)Grad: 0.2449  
Epoch: [4][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4112(0.4383)Grad: 0.4163  
EVAL: [0/21] Data 0.109 (0.109) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3368(0.3368) 


Epoch 4 - avg_train_loss: 0.4383 avg_val_loss: 0.3913 time: 1s
Epoch 4 - Accuracy: 0.889310803113727


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4164(0.3913) 
Epoch: [5][0/83]Data 0.099 (0.099)Elapsed 0m 0s (remain 0m 8s)Loss: 0.5028(0.5028)Grad: 0.2849  
Epoch: [5][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5388(0.4346)Grad: 0.3635  
EVAL: [0/21] Data 0.087 (0.087) Elapsed 0m 0s (remain 0m 1s) Loss: 0.3358(0.3358) 


Epoch 5 - avg_train_loss: 0.4346 avg_val_loss: 0.3903 time: 1s
Epoch 5 - Accuracy: 0.8898803873172584


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4122(0.3903) 
Epoch: [6][0/83]Data 0.115 (0.115)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4453(0.4453)Grad: 0.1355  
Epoch: [6][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4745(0.4333)Grad: 0.4416  
EVAL: [0/21] Data 0.100 (0.100) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3355(0.3355) 


Epoch 6 - avg_train_loss: 0.4333 avg_val_loss: 0.3896 time: 1s
Epoch 6 - Accuracy: 0.8898803873172584


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4117(0.3896) 
Epoch: [7][0/83]Data 0.107 (0.107)Elapsed 0m 0s (remain 0m 9s)Loss: 0.5475(0.5475)Grad: 0.1951  
Epoch: [7][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.1667(0.4295)Grad: 0.2667  
EVAL: [0/21] Data 0.106 (0.106) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3358(0.3358) 


Epoch 7 - avg_train_loss: 0.4295 avg_val_loss: 0.3889 time: 1s
Epoch 7 - Accuracy: 0.8900702487184355


EVAL: [20/21] Data 0.005 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4087(0.3889) 
Epoch: [8][0/83]Data 0.112 (0.112)Elapsed 0m 0s (remain 0m 9s)Loss: 0.5001(0.5001)Grad: 0.2121  
Epoch: [8][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4112(0.4300)Grad: 0.2656  
EVAL: [0/21] Data 0.097 (0.097) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3359(0.3359) 


Epoch 8 - avg_train_loss: 0.4300 avg_val_loss: 0.3886 time: 1s
Epoch 8 - Accuracy: 0.8902601101196127
Epoch 8 - Save Best Score: 0.8903 Mpdel


EVAL: [20/21] Data 0.004 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4077(0.3886) 
Epoch: [9][0/83]Data 0.128 (0.128)Elapsed 0m 0s (remain 0m 11s)Loss: 0.4476(0.4476)Grad: 0.1985  
Epoch: [9][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5332(0.4308)Grad: 0.4245  
EVAL: [0/21] Data 0.106 (0.106) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3359(0.3359) 


Epoch 9 - avg_train_loss: 0.4308 avg_val_loss: 0.3885 time: 1s
Epoch 9 - Accuracy: 0.8902601101196127


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4076(0.3885) 
Epoch: [10][0/83]Data 0.099 (0.099)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4424(0.4424)Grad: 0.1478  
Epoch: [10][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2598(0.4304)Grad: 0.2893  
EVAL: [0/21] Data 0.099 (0.099) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3359(0.3359) 


Epoch 10 - avg_train_loss: 0.4304 avg_val_loss: 0.3885 time: 1s
Epoch 10 - Accuracy: 0.8902601101196127


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4075(0.3885) 
Epoch: [11][0/83]Data 0.119 (0.119)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3496(0.3496)Grad: 0.1768  
Epoch: [11][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3862(0.4304)Grad: 0.5111  
EVAL: [0/21] Data 0.115 (0.115) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3352(0.3352) 


Epoch 11 - avg_train_loss: 0.4304 avg_val_loss: 0.3877 time: 1s
Epoch 11 - Accuracy: 0.8906398329219669
Epoch 11 - Save Best Score: 0.8906 Mpdel


EVAL: [20/21] Data 0.003 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4074(0.3877) 
Epoch: [12][0/83]Data 0.120 (0.120)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4871(0.4871)Grad: 0.2736  
Epoch: [12][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2797(0.4305)Grad: 0.3353  
EVAL: [0/21] Data 0.106 (0.106) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3324(0.3324) 


Epoch 12 - avg_train_loss: 0.4305 avg_val_loss: 0.3877 time: 1s
Epoch 12 - Accuracy: 0.8908296943231441
Epoch 12 - Save Best Score: 0.8908 Mpdel


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4027(0.3877) 
Epoch: [13][0/83]Data 0.119 (0.119)Elapsed 0m 0s (remain 0m 10s)Loss: 0.2962(0.2962)Grad: 0.1894  
Epoch: [13][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5210(0.4261)Grad: 0.5117  
EVAL: [0/21] Data 0.115 (0.115) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3355(0.3355) 


Epoch 13 - avg_train_loss: 0.4261 avg_val_loss: 0.3870 time: 1s
Epoch 13 - Accuracy: 0.8908296943231441


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4021(0.3870) 
Epoch: [14][0/83]Data 0.095 (0.095)Elapsed 0m 0s (remain 0m 8s)Loss: 0.3992(0.3992)Grad: 0.1909  
Epoch: [14][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5614(0.4287)Grad: 0.3464  
EVAL: [0/21] Data 0.095 (0.095) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3315(0.3315) 


Epoch 14 - avg_train_loss: 0.4287 avg_val_loss: 0.3857 time: 1s
Epoch 14 - Accuracy: 0.8906398329219669


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.4007(0.3857) 
Epoch: [15][0/83]Data 0.110 (0.110)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4280(0.4280)Grad: 0.1519  
Epoch: [15][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.2201(0.4246)Grad: 0.3096  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3343(0.3343) 


Epoch 15 - avg_train_loss: 0.4246 avg_val_loss: 0.3853 time: 1s
Epoch 15 - Accuracy: 0.8908296943231441


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3994(0.3853) 
Epoch: [16][0/83]Data 0.118 (0.118)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3896(0.3896)Grad: 0.1680  
Epoch: [16][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3744(0.4255)Grad: 0.3325  
EVAL: [0/21] Data 0.108 (0.108) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3332(0.3332) 


Epoch 16 - avg_train_loss: 0.4255 avg_val_loss: 0.3847 time: 1s
Epoch 16 - Accuracy: 0.8906398329219669


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3982(0.3847) 
Epoch: [17][0/83]Data 0.114 (0.114)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4438(0.4438)Grad: 0.1423  
Epoch: [17][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3499(0.4216)Grad: 0.2930  
EVAL: [0/21] Data 0.116 (0.116) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3327(0.3327) 


Epoch 17 - avg_train_loss: 0.4216 avg_val_loss: 0.3843 time: 1s
Epoch 17 - Accuracy: 0.8913992785266756
Epoch 17 - Save Best Score: 0.8914 Mpdel


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3984(0.3843) 
Epoch: [18][0/83]Data 0.099 (0.099)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4012(0.4012)Grad: 0.1501  
Epoch: [18][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3284(0.4227)Grad: 0.2995  
EVAL: [0/21] Data 0.114 (0.114) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3327(0.3327) 


Epoch 18 - avg_train_loss: 0.4227 avg_val_loss: 0.3838 time: 1s
Epoch 18 - Accuracy: 0.8908296943231441


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3970(0.3838) 
Epoch: [19][0/83]Data 0.097 (0.097)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4935(0.4935)Grad: 0.1783  
Epoch: [19][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5313(0.4246)Grad: 0.3395  
EVAL: [0/21] Data 0.145 (0.145) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3324(0.3324) 


Epoch 19 - avg_train_loss: 0.4246 avg_val_loss: 0.3836 time: 1s
Epoch 19 - Accuracy: 0.8910195557243212


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3971(0.3836) 
Epoch: [20][0/83]Data 0.108 (0.108)Elapsed 0m 0s (remain 0m 10s)Loss: 0.3894(0.3894)Grad: 0.1615  
Epoch: [20][82/83]Data 0.002 (0.004)Elapsed 0m 1s (remain 0m 0s)Loss: 0.6677(0.4243)Grad: 0.4082  
EVAL: [0/21] Data 0.107 (0.107) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3325(0.3325) 


Epoch 20 - avg_train_loss: 0.4243 avg_val_loss: 0.3836 time: 1s


EVAL: [20/21] Data 0.000 (0.012) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3969(0.3836) 


Epoch 20 - Accuracy: 0.8906398329219669


Epoch: [21][0/83]Data 0.092 (0.092)Elapsed 0m 0s (remain 0m 8s)Loss: 0.4025(0.4025)Grad: 0.2286  
Epoch: [21][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4088(0.4219)Grad: 0.3504  
EVAL: [0/21] Data 0.112 (0.112) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3327(0.3327) 


Epoch 21 - avg_train_loss: 0.4219 avg_val_loss: 0.3842 time: 1s
Epoch 21 - Accuracy: 0.8906398329219669


EVAL: [20/21] Data 0.002 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3994(0.3842) 
Epoch: [22][0/83]Data 0.099 (0.099)Elapsed 0m 0s (remain 0m 8s)Loss: 0.4482(0.4482)Grad: 0.2178  
Epoch: [22][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4630(0.4168)Grad: 0.3845  
EVAL: [0/21] Data 0.104 (0.104) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3308(0.3308) 


Epoch 22 - avg_train_loss: 0.4168 avg_val_loss: 0.3834 time: 1s
Epoch 22 - Accuracy: 0.8910195557243212


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3955(0.3834) 
Epoch: [23][0/83]Data 0.096 (0.096)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3660(0.3660)Grad: 0.1700  
Epoch: [23][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5193(0.4205)Grad: 0.2865  
EVAL: [0/21] Data 0.094 (0.094) Elapsed 0m 0s (remain 0m 1s) Loss: 0.3334(0.3334) 


Epoch 23 - avg_train_loss: 0.4205 avg_val_loss: 0.3826 time: 1s
Epoch 23 - Accuracy: 0.8919688627302069
Epoch 23 - Save Best Score: 0.8920 Mpdel


EVAL: [20/21] Data 0.000 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3932(0.3826) 
Epoch: [24][0/83]Data 0.111 (0.111)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4506(0.4506)Grad: 0.2738  
Epoch: [24][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4649(0.4203)Grad: 0.4160  
EVAL: [0/21] Data 0.113 (0.113) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3294(0.3294) 


Epoch 24 - avg_train_loss: 0.4203 avg_val_loss: 0.3809 time: 1s
Epoch 24 - Accuracy: 0.8919688627302069


EVAL: [20/21] Data 0.002 (0.007) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3938(0.3809) 
Epoch: [25][0/83]Data 0.109 (0.109)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3458(0.3458)Grad: 0.1692  
Epoch: [25][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.6039(0.4185)Grad: 0.6770  
EVAL: [0/21] Data 0.114 (0.114) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3298(0.3298) 


Epoch 25 - avg_train_loss: 0.4185 avg_val_loss: 0.3805 time: 1s
Epoch 25 - Accuracy: 0.8913992785266756


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3952(0.3805) 
Epoch: [26][0/83]Data 0.120 (0.120)Elapsed 0m 0s (remain 0m 10s)Loss: 0.4492(0.4492)Grad: 0.1924  
Epoch: [26][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.5049(0.4182)Grad: 0.3271  
EVAL: [0/21] Data 0.118 (0.118) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3303(0.3303) 


Epoch 26 - avg_train_loss: 0.4182 avg_val_loss: 0.3802 time: 1s
Epoch 26 - Accuracy: 0.8917790013290298


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3920(0.3802) 
Epoch: [27][0/83]Data 0.109 (0.109)Elapsed 0m 0s (remain 0m 9s)Loss: 0.3862(0.3862)Grad: 0.2243  
Epoch: [27][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3797(0.4174)Grad: 0.4200  
EVAL: [0/21] Data 0.109 (0.109) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3299(0.3299) 


Epoch 27 - avg_train_loss: 0.4174 avg_val_loss: 0.3793 time: 1s
Epoch 27 - Accuracy: 0.8921587241313841
Epoch 27 - Save Best Score: 0.8922 Mpdel


EVAL: [20/21] Data 0.000 (0.009) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3917(0.3793) 
Epoch: [28][0/83]Data 0.104 (0.104)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4844(0.4844)Grad: 0.1877  
Epoch: [28][82/83]Data 0.000 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4969(0.4167)Grad: 0.2618  
EVAL: [0/21] Data 0.120 (0.120) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3295(0.3295) 


Epoch 28 - avg_train_loss: 0.4167 avg_val_loss: 0.3793 time: 1s
Epoch 28 - Accuracy: 0.8919688627302069


EVAL: [20/21] Data 0.000 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3906(0.3793) 
Epoch: [29][0/83]Data 0.094 (0.094)Elapsed 0m 0s (remain 0m 8s)Loss: 0.3682(0.3682)Grad: 0.1628  
Epoch: [29][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.4362(0.4167)Grad: 0.4279  
EVAL: [0/21] Data 0.116 (0.116) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3291(0.3291) 


Epoch 29 - avg_train_loss: 0.4167 avg_val_loss: 0.3792 time: 1s
Epoch 29 - Accuracy: 0.8917790013290298


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3909(0.3792) 
Epoch: [30][0/83]Data 0.109 (0.109)Elapsed 0m 0s (remain 0m 9s)Loss: 0.4524(0.4524)Grad: 0.1864  
Epoch: [30][82/83]Data 0.002 (0.004)Elapsed 0m 0s (remain 0m 0s)Loss: 0.3822(0.4142)Grad: 0.3405  
EVAL: [0/21] Data 0.115 (0.115) Elapsed 0m 0s (remain 0m 2s) Loss: 0.3293(0.3293) 


Epoch 30 - avg_train_loss: 0.4142 avg_val_loss: 0.3792 time: 1s
Epoch 30 - Accuracy: 0.8917790013290298
Score: 0.89216
Score: 0.89433


EVAL: [20/21] Data 0.002 (0.008) Elapsed 0m 0s (remain 0m 0s) Loss: 0.3908(0.3792) 
