<a href="https://www.kaggle.com/code/dwijmistry/sign-language-challenge-google?scriptVersionId=121288244" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Google - Isolated Sign Language Recognition

The goal of this competition is to classify isolated American Sign Language (ASL) signs.

The landmarks were extracted from raw videos with the MediaPipe holistic model and are asked to predict the sign from this data.

## Version 1
- Based on https://www.kaggle.com/code/robikscube/sign-language-recognition-eda-twitch-stream

## Version 2
- Added remaining code

## Version 3
- Based on https://www.kaggle.com/code/medali1992/gislr-nn-arcface-baseline/notebook (Version 26)

# pip Install

In [1]:
!pip install onnx_tf
!pip install tflite-runtime
!pip install -q --upgrade wandb

# install nb_black for sutoformatting
!pip install nb_black --quiet
%load_ext lab_black

Collecting onnx_tf
  Downloading onnx_tf-1.10.0-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.1/226.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx_tf
Successfully installed onnx_tf-1.10.0
[0mCollecting tflite-runtime
  Downloading tflite_runtime-2.11.0-cp37-cp37m-manylinux2014_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tflite-runtime
Successfully installed tflite-runtime-2.11.0
[0m

# Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("seaborn-colorblind")


import math
import random
import time
from collections import OrderedDict
import tensorflow as tf
from tqdm import tqdm
import json
import os
import gc
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GroupKFold, StratifiedGroupKFold

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.optim.optimizer import Optimizer
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import (
    CosineAnnealingWarmRestarts,
    CosineAnnealingLR,
    ReduceLROnPlateau,
)
from torchinfo import summary

import onnx
import onnx_tf
from onnx_tf.backend import prepare

import warnings

warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
VERSION = 14
DATA_DIR = "/kaggle/input/asl-signs"

# Utils

In [3]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = accuracy_score(y_true, y_pred)
    return score


def seed_torch(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


def load_relevant_data_subset_with_imputation(pq_path):
    data_columns = ["x", "y", "z"]
    data = pd.read_parquet(pq_path, columns=data_columns)
    data.replace(np.nan, 0, inplace=True)
    n_frames = int(len(data) / CFG.rows_per_frame)
    data = data.values.reshape(n_frames, CFG.rows_per_frame, len(data_columns))
    return data.astype(np.float32)


def load_relevant_data_subset(pq_path):
    data_columns = ["x", "y"]
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / CFG.rows_per_frame)
    data = data.values.reshape(n_frames, CFG.rows_per_frame, len(data_columns))
    return data.astype(np.float32)


def read_dict(file_path):
    path = os.path.expanduser(file_path)
    with open(path, "r") as f:
        dic = json.load(f)
    return dic

# Configuration

In [4]:
class CFG:
    num_workers = 2
    apex = False
    scheduler = "CosineAnnealingLR"  # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts','OneCycleLR']
    epochs = 500
    print_freq = 200
    # CosineAnnealingLR params
    cosanneal_params = {"T_max": 5, "eta_min": 3 * 1e-5, "last_epoch": -1}
    # ReduceLROnPlateau params
    reduce_params = {
        "mode": "max",
        "factor": 0.8,
        "patience": 5,
        "eps": 1e-6,
        "verbose": True,
    }
    # CosineAnnealingWarmRestarts params
    cosanneal_res_params = {"T_0": 3, "eta_min": 1e-6, "T_mult": 1, "last_epoch": -1}
    # OneCycleLR params
    onecycle_params = {
        "pct_start": 0.1,
        "div_factor": 1e1,
        "max_lr": 1e-3,
        "steps_per_epoch": 3,
        "epochs": 3,
    }
    momentum = 0.9
    model_name = "NN_ArcFace"
    lr = 0.000333
    weight_decay = 1e-4
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    data_path = "../input/asl-signs/"
    debug = False
    arcface = False
    use_aggregation_dataset = True
    target_size = 250
    rows_per_frame = 543
    batch_size = 512
    train = True
    early_stop = True
    target_col = "label"
    scale = 30.0
    margin = 0.50
    easy_margin = False
    ls_eps = 0.0
    fc_dim = 512
    early_stopping_steps = 5
    grad_cam = False
    seed = 42

# Directory Settings


In [5]:
# ====================================================
# Directory settings
# ====================================================
import os

OUTPUT_DIR = f"./{CFG.model_name}_version{VERSION}/"
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)


def init_logger(log_file=OUTPUT_DIR + "train.log"):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler

    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger


LOGGER = init_logger()

# Load Data


In [6]:
train = pd.read_csv(f"{CFG.data_path}train.csv")
label_index = read_dict(f"{CFG.data_path}sign_to_prediction_index_map.json")
index_label = dict([(label_index[key], key) for key in label_index])
train["label"] = train["sign"].map(lambda sign: label_index[sign])
train["path"] = DATA_DIR + "/" + train["path"]

if CFG.debug:
    CFG.epochs = 1
    train = train.sample(n=4000, random_state=CFG.seed).reset_index(drop=True)

train.head()

Unnamed: 0,path,participant_id,sequence_id,sign,label
0,/kaggle/input/asl-signs/train_landmark_files/2...,26734,1000035562,blow,25
1,/kaggle/input/asl-signs/train_landmark_files/2...,28656,1000106739,wait,232
2,/kaggle/input/asl-signs/train_landmark_files/1...,16069,100015657,cloud,48
3,/kaggle/input/asl-signs/train_landmark_files/2...,25571,1000210073,bird,23
4,/kaggle/input/asl-signs/train_landmark_files/6...,62590,1000240708,owie,164


# Data Preparation¶

In [7]:
DROP_Z = True

NUM_FRAMES = 15
SEGMENTS = 3

LEFT_HAND_OFFSET = 468
POSE_OFFSET = LEFT_HAND_OFFSET + 21
RIGHT_HAND_OFFSET = POSE_OFFSET + 33

## average over the entire face, and the entire 'pose'
averaging_sets = [[0, 468], [POSE_OFFSET, 33]]

lip_landmarks = [
    61,
    185,
    40,
    39,
    37,
    0,
    267,
    269,
    270,
    409,
    291,
    146,
    91,
    181,
    84,
    17,
    314,
    405,
    321,
    375,
    78,
    191,
    80,
    81,
    82,
    13,
    312,
    311,
    310,
    415,
    95,
    88,
    178,
    87,
    14,
    317,
    402,
    318,
    324,
    308,
]
left_hand_landmarks = list(range(LEFT_HAND_OFFSET, LEFT_HAND_OFFSET + 21))
right_hand_landmarks = list(range(RIGHT_HAND_OFFSET, RIGHT_HAND_OFFSET + 21))

point_landmarks = [
    item
    for sublist in [lip_landmarks, left_hand_landmarks, right_hand_landmarks]
    for item in sublist
]

LANDMARKS = len(point_landmarks) + len(averaging_sets)
print(LANDMARKS)
if DROP_Z:
    INPUT_SHAPE = (NUM_FRAMES, LANDMARKS * 2)
else:
    INPUT_SHAPE = (NUM_FRAMES, LANDMARKS * 3)

FLAT_INPUT_SHAPE = (INPUT_SHAPE[0] + 2 * (SEGMENTS + 1)) * INPUT_SHAPE[1]

84


In [8]:
def tf_nan_mean(x, axis=0):
    return tf.reduce_sum(
        tf.where(tf.math.is_nan(x), tf.zeros_like(x), x), axis=axis
    ) / tf.reduce_sum(
        tf.where(tf.math.is_nan(x), tf.zeros_like(x), tf.ones_like(x)), axis=axis
    )


def tf_nan_std(x, axis=0):
    d = x - tf_nan_mean(x, axis=axis)
    return tf.math.sqrt(tf_nan_mean(d * d, axis=axis))


def flatten_means_and_stds(x, axis=0):
    # Get means and stds
    x_mean = tf_nan_mean(x, axis=0)
    x_std = tf_nan_std(x, axis=0)

    x_out = tf.concat([x_mean, x_std], axis=0)
    x_out = tf.reshape(x_out, (1, INPUT_SHAPE[1] * 2))
    x_out = tf.where(tf.math.is_finite(x_out), x_out, tf.zeros_like(x_out))
    return x_out

In [9]:
class FeatureGen(tf.keras.layers.Layer):
    def __init__(self):
        super(FeatureGen, self).__init__()

    def call(self, x_in):
        #         print(right_hand_percentage(x))
        x_list = [
            tf.expand_dims(
                tf_nan_mean(x_in[:, av_set[0] : av_set[0] + av_set[1], :], axis=1),
                axis=1,
            )
            for av_set in averaging_sets
        ]
        x_list.append(tf.gather(x_in, point_landmarks, axis=1))
        x = tf.concat(x_list, 1)

        x_padded = x
        for i in range(SEGMENTS):
            p0 = tf.where(
                ((tf.shape(x_padded)[0] % SEGMENTS) > 0) & ((i % 2) != 0), 1, 0
            )
            p1 = tf.where(
                ((tf.shape(x_padded)[0] % SEGMENTS) > 0) & ((i % 2) == 0), 1, 0
            )
            paddings = [[p0, p1], [0, 0], [0, 0]]
            x_padded = tf.pad(x_padded, paddings, mode="SYMMETRIC")
        x_list = tf.split(x_padded, SEGMENTS)
        x_list = [flatten_means_and_stds(_x, axis=0) for _x in x_list]

        x_list.append(flatten_means_and_stds(x, axis=0))

        ## Resize only dimension 0. Resize can't handle nan, so replace nan with that dimension's avg value to reduce impact.
        x = tf.image.resize(
            tf.where(tf.math.is_finite(x), x, tf_nan_mean(x, axis=0)),
            [NUM_FRAMES, LANDMARKS],
        )
        x = tf.reshape(x, (1, INPUT_SHAPE[0] * INPUT_SHAPE[1]))
        x = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)
        x_list.append(x)
        x = tf.concat(x_list, axis=1)
        return x


feature_converter = FeatureGen()

X = np.load(
    "/kaggle/input/gislr-feature-data-on-the-shoulders/feature_data.npy"
).astype(np.float32)
y = np.load(
    "/kaggle/input/gislr-feature-data-on-the-shoulders/feature_labels.npy"
).astype(np.uint8)
print(X.shape, y.shape)

if DROP_Z:
    X = np.reshape(X, [X.shape[0], -1, 3])
    X = X[:, :, 0:2]
    X = np.reshape(X, [X.shape[0], -1])
    print(X.shape, y.shape)

(94477, 5796) (94477,)
(94477, 3864) (94477,)


# Model Tracking

In [10]:
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
wandb_api = user_secrets.get_secret("wandb_key")

import wandb

wandb.login(key=wandb_api)


def class2dict(f):
    return dict(
        (name, getattr(f, name)) for name in dir(f) if not name.startswith("__")
    )


run = wandb.init(
    project="GISLR Competition",
    name=f"{CFG.model_name}_Version{VERSION}",
    config=class2dict(CFG),
    group=CFG.model_name,
    job_type="train",
)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdwijmistry[0m. Use [1m`wandb login --relogin`[0m to force relogin


# DataSet

In [11]:
class Dataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, i):
        return self.X[i].astype(np.float32), self.y[i]

# Model

In [12]:
class ArcMarginProduct(nn.Module):
    def __init__(
        self,
        in_features,
        out_features,
        scale=30.0,
        margin=0.50,
        easy_margin=False,
        ls_eps=0.0,
    ):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.scale = scale
        self.margin = margin
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(margin)
        self.sin_m = math.sin(margin)
        self.th = math.cos(math.pi - margin)
        self.mm = math.sin(math.pi - margin) * margin

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device=device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.scale

        return output


# https://github.com/haqishen/Google-Landmark-Recognition-2020-3rd-Place-Solution/blob/main/landmark-recognition-2020-third-place-submission.ipynb
class ArcMarginProduct_subcenter(nn.Module):
    def __init__(self, in_features, out_features, k=3):
        super().__init__()
        self.weight = nn.Parameter(torch.FloatTensor(out_features * k, in_features))
        self.reset_parameters()
        self.k = k
        self.out_features = out_features

    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)

    def forward(self, features):
        cosine_all = F.linear(F.normalize(features), F.normalize(self.weight))
        cosine_all = cosine_all.view(-1, self.out_features, self.k)
        cosine, _ = torch.max(cosine_all, dim=2)
        return cosine

In [13]:
class ASLLinearModel(torch.nn.Module):
    def __init__(
        self,
        in_features: int,
        first_out_features: int,
        num_classes: int,
        num_blocks: int,
        drop_rate: float,
    ):
        super(ASLLinearModel, self).__init__()

        blocks = []
        out_features = first_out_features
        for idx in range(num_blocks):
            blocks.append(self._make_block(in_features, out_features, drop_rate))

            in_features = out_features
            out_features = out_features // 2

        self.model = nn.Sequential(*blocks)
        self.final = ArcMarginProduct(
            256,
            num_classes,
            scale=CFG.scale,
            margin=CFG.margin,
            easy_margin=False,
            ls_eps=0.0,
        )
        self.fc_probs = nn.Linear(256, num_classes)
        print(self.model)

    def _make_block(self, in_features, out_features, drop_rate):
        return nn.Sequential(
            nn.Linear(in_features, out_features),
            nn.BatchNorm1d(out_features),
            nn.ReLU(),
            nn.Dropout(drop_rate),
        )

    def forward(self, x, label):
        feature = self.model(x)
        if CFG.arcface:
            arcface = self.final(feature, label)
            probs = self.fc_probs(feature)
            return probs, arcface
        else:
            probs = self.fc_probs(feature)
            return probs

# Helper Function

In [14]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return "%dm %ds" % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return "%s (remain %s)" % (asMinutes(s), asMinutes(rs))


def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    if CFG.apex:
        scaler = GradScaler()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0
    for step, (features, labels) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        features = features.to(device).float()
        labels = labels.to(device).long()
        batch_size = labels.size(0)
        if CFG.apex:
            with autocast():
                if CFG.arcface:
                    probs, arcface = model(features, labels)
                    arcface_loss = nn.CrossEntropyLoss()(arcface, labels)
                    loss = criterion(probs, labels)
                else:
                    y_preds = model(features, labels)
                    loss = criterion(y_preds, labels)
        else:
            if CFG.arcface:
                probs, arcface = model(features, labels)
                arcface_loss = nn.CrossEntropyLoss()(arcface, labels)
                loss = criterion(probs, labels)
            else:
                y_preds = model(features, labels)
                loss = criterion(y_preds, labels)
        # record loss
        if CFG.arcface:
            loss = 0.5 * loss + 0.5 * arcface_loss
            losses.update(loss.item(), batch_size)
        else:
            losses.update(loss.item(), batch_size)

        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if CFG.apex:
            scaler.scale(loss).backward()
        else:
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(
            model.parameters(), CFG.max_grad_norm
        )
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            if CFG.apex:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader) - 1):
            print(
                "Epoch: [{0}][{1}/{2}] "
                "Elapsed {remain:s} "
                "Loss: {loss.val:.4f}({loss.avg:.4f}) "
                "Grad: {grad_norm:.4f} "
                "LR: {lr:.6f}  ".format(
                    epoch + 1,
                    step,
                    len(train_loader),
                    remain=timeSince(start, float(step + 1) / len(train_loader)),
                    loss=losses,
                    grad_norm=grad_norm,
                    lr=scheduler.get_lr()[0],
                )
            )
        wandb.log(
            {
                f"loss": losses.val,
                f"lr": scheduler.get_lr()[0],
            }
        )
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    preds = []
    start = end = time.time()
    for step, (features, labels) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        features = features.to(device).float()
        labels = labels.to(device).long()
        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            if CFG.arcface:
                y_preds, _ = model(features, labels)
            else:
                y_preds = model(features, labels)

        preds.append(y_preds.softmax(1).to("cpu").numpy())
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader) - 1):
            print(
                "EVAL: [{0}/{1}] "
                "Elapsed {remain:s} "
                "Loss: {loss.val:.4f}({loss.avg:.4f}) ".format(
                    step,
                    len(valid_loader),
                    loss=losses,
                    remain=timeSince(start, float(step + 1) / len(valid_loader)),
                )
            )
    predictions = np.concatenate(preds)
    return losses.avg, predictions

# Train Loop¶


In [15]:
# Seed for producing results
seed_torch(seed=45)

# ====================================================
# loader
# ====================================================
groups = train["path"].map(lambda x: x.split("/")[5])
sgkf = StratifiedGroupKFold(n_splits=5, random_state=42, shuffle=True)
for i, (train_index, valid_index) in enumerate(sgkf.split(X, y, groups)):
    train_index = train_index
    valid_index = valid_index
    print(f"Fold {i}:")
    print(f"  Train index shape: {train_index.shape}")
    print(f"         group={groups[train_index]}")
    print(f"  Valid index shape:  {valid_index.shape}")
    print(f"         group={groups[valid_index]}")
    break
X_train, X_val, y_train, y_val = (
    X[train_index],
    X[valid_index],
    y[train_index],
    y[valid_index],
)
train_dataset = Dataset(X_train, y_train)
valid_dataset = Dataset(X_val, y_val)


train_loader = DataLoader(
    train_dataset,
    batch_size=CFG.batch_size,
    shuffle=True,
    num_workers=CFG.num_workers,
    pin_memory=True,
    drop_last=True,
)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=CFG.batch_size,
    shuffle=False,
    num_workers=CFG.num_workers,
    pin_memory=True,
    drop_last=False,
)


# ====================================================
# scheduler
# ====================================================
def get_scheduler(optimizer):
    if CFG.scheduler == "ReduceLROnPlateau":
        scheduler = ReduceLROnPlateau(optimizer, **CFG.reduce_params)
    elif CFG.scheduler == "CosineAnnealingLR":
        scheduler = CosineAnnealingLR(optimizer, **CFG.cosanneal_params)
    elif CFG.scheduler == "CosineAnnealingWarmRestarts":
        scheduler = CosineAnnealingWarmRestarts(optimizer, **CFG.reduce_params)
    return scheduler


# ====================================================
# model & optimizer
# ====================================================
model = ASLLinearModel(
    in_features=3864,
    first_out_features=1024,
    num_classes=250,
    num_blocks=3,
    drop_rate=0.4,
)
model.to(device)

optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
scheduler = get_scheduler(optimizer)

# ====================================================
# loop
# ====================================================
criterion = nn.CrossEntropyLoss()
best_score = 0
for epoch in range(CFG.epochs):
    start_time = time.time()

    # train
    avg_loss = train_fn(
        train_loader, model, criterion, optimizer, epoch, scheduler, device
    )

    # eval
    avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)

    if isinstance(scheduler, ReduceLROnPlateau):
        scheduler.step(avg_val_loss)
    elif isinstance(scheduler, CosineAnnealingLR):
        scheduler.step()
    elif isinstance(scheduler, CosineAnnealingWarmRestarts):
        scheduler.step()

    score = get_score(y_val, preds.argmax(1))

    elapsed = time.time() - start_time

    LOGGER.info(
        f"Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s"
    )
    LOGGER.info(f"Epoch {epoch+1} - Score: {score:.4f}")
    wandb.log(
        {
            f"epoch": epoch + 1,
            f"avg_train_loss": avg_loss,
            f"avg_val_loss": avg_val_loss,
            f"score": score,
        }
    )

    if best_score < score:
        best_score = score
        LOGGER.info(f"Epoch {epoch+1} - Save Best score: {best_score:.4f} Model")
        torch.save(
            model.state_dict(),
            OUTPUT_DIR + f"{CFG.model_name}_best_score_version{VERSION}.pth",
        )
LOGGER.info(f"Our CV score is {best_score}")

Fold 0:
  Train index shape: (70951,)
         group=0        26734
3        25571
5        26734
6        26734
7        32319
         ...  
94470    61333
94471    25571
94473    26734
94474    25571
94475    29302
Name: path, Length: 70951, dtype: object
  Valid index shape:  (23526,)
         group=1        28656
2        16069
4        62590
11       36257
15       28656
         ...  
94461    62590
94462    62590
94469    53618
94472    53618
94476    36257
Name: path, Length: 23526, dtype: object
Sequential(
  (0): Sequential(
    (0): Linear(in_features=3864, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
  )
  (1): Sequential(
    (0): Linear(in_features=1024, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
  )
  (2): Sequen

Epoch 1 - avg_train_loss: 5.4442  avg_val_loss: 5.1672  time: 6s
Epoch 1 - Score: 0.0174
Epoch 1 - Save Best score: 0.0174 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 5.1634(5.1672) 
Epoch: [2][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 5.2075(5.2075) Grad: 1.1028 LR: 0.000278  
Epoch: [2][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 4.8111(4.9983) Grad: 1.0895 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 4.7577(4.7577) 


Epoch 2 - avg_train_loss: 4.9983  avg_val_loss: 4.7822  time: 4s
Epoch 2 - Score: 0.0463
Epoch 2 - Save Best score: 0.0463 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 4.7853(4.7822) 
Epoch: [3][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 4.8251(4.8251) Grad: 1.1387 LR: 0.000174  
Epoch: [3][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 4.5219(4.6059) Grad: 1.1337 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 4.5905(4.5905) 


Epoch 3 - avg_train_loss: 4.6059  avg_val_loss: 4.6287  time: 3s
Epoch 3 - Score: 0.0546
Epoch 3 - Save Best score: 0.0546 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 4.5859(4.6287) 
Epoch: [4][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 4.4157(4.4157) Grad: 1.1155 LR: 0.000085  
Epoch: [4][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 4.3067(4.3842) Grad: 1.3481 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 4.3369(4.3369) 


Epoch 4 - avg_train_loss: 4.3842  avg_val_loss: 4.3771  time: 3s
Epoch 4 - Score: 0.0965
Epoch 4 - Save Best score: 0.0965 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 4.3600(4.3771) 
Epoch: [5][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 4.2384(4.2384) Grad: 1.2797 LR: 0.000038  
Epoch: [5][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 4.1755(4.2407) Grad: 1.4289 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 4.1842(4.1842) 


Epoch 5 - avg_train_loss: 4.2407  avg_val_loss: 4.2174  time: 4s
Epoch 5 - Score: 0.1172
Epoch 5 - Save Best score: 0.1172 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 4.2036(4.2174) 
Epoch: [6][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 4.1813(4.1813) Grad: 1.4689 LR: 0.000030  
Epoch: [6][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 4.1652(4.1615) Grad: 1.3960 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 4.1292(4.1292) 


Epoch 6 - avg_train_loss: 4.1615  avg_val_loss: 4.1619  time: 3s
Epoch 6 - Score: 0.1228
Epoch 6 - Save Best score: 0.1228 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 4.1506(4.1619) 
Epoch: [7][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 4.2737(4.2737) Grad: 2.1430 LR: 0.000088  
Epoch: [7][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 4.0773(4.0964) Grad: 1.5586 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 4.0599(4.0599) 


Epoch 7 - avg_train_loss: 4.0964  avg_val_loss: 4.0964  time: 3s
Epoch 7 - Score: 0.1410
Epoch 7 - Save Best score: 0.1410 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 4.0688(4.0964) 
Epoch: [8][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 4.0616(4.0616) Grad: 1.8167 LR: 0.000409  
Epoch: [8][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 3.8943(4.0021) Grad: 1.7823 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 4.0517(4.0517) 


Epoch 8 - avg_train_loss: 4.0021  avg_val_loss: 4.1097  time: 4s
Epoch 8 - Score: 0.1298


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 4.0882(4.1097) 
Epoch: [9][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 3.9293(3.9293) Grad: 2.4752 LR: 0.000406  
Epoch: [9][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 3.7753(3.8528) Grad: 1.8839 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 3.8822(3.8822) 


Epoch 9 - avg_train_loss: 3.8528  avg_val_loss: 3.9480  time: 3s
Epoch 9 - Score: 0.1520
Epoch 9 - Save Best score: 0.1520 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.9696(3.9480) 
Epoch: [10][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 3.6903(3.6903) Grad: 1.6479 LR: 0.000409  
Epoch: [10][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 3.4715(3.7059) Grad: 2.3615 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 4.0672(4.0672) 


Epoch 10 - avg_train_loss: 3.7059  avg_val_loss: 4.1211  time: 4s
Epoch 10 - Score: 0.1132


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 4.2179(4.1211) 
Epoch: [11][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 3.5689(3.5689) Grad: 2.1104 LR: 0.000365  
Epoch: [11][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 3.5284(3.5473) Grad: 1.9476 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 3.7980(3.7980) 


Epoch 11 - avg_train_loss: 3.5473  avg_val_loss: 3.9113  time: 4s
Epoch 11 - Score: 0.1345


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.9679(3.9113) 
Epoch: [12][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 3.4694(3.4694) Grad: 2.1092 LR: 0.000278  
Epoch: [12][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 3.4600(3.4027) Grad: 2.3484 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 3.8505(3.8505) 


Epoch 12 - avg_train_loss: 3.4027  avg_val_loss: 3.9296  time: 3s
Epoch 12 - Score: 0.1358


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.9399(3.9296) 
Epoch: [13][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 3.2390(3.2390) Grad: 2.4577 LR: 0.000174  
Epoch: [13][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 3.1926(3.2619) Grad: 2.2159 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2724(3.2724) 


Epoch 13 - avg_train_loss: 3.2619  avg_val_loss: 3.3817  time: 3s
Epoch 13 - Score: 0.2472
Epoch 13 - Save Best score: 0.2472 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.3877(3.3817) 
Epoch: [14][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 3.2237(3.2237) Grad: 2.0064 LR: 0.000085  
Epoch: [14][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 3.1084(3.1156) Grad: 2.6303 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 3.0903(3.0903) 


Epoch 14 - avg_train_loss: 3.1156  avg_val_loss: 3.1978  time: 4s
Epoch 14 - Score: 0.2797
Epoch 14 - Save Best score: 0.2797 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.2069(3.1978) 
Epoch: [15][0/138] Elapsed 0m 0s (remain 0m 24s) Loss: 3.0993(3.0993) Grad: 2.2963 LR: 0.000038  
Epoch: [15][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.8621(3.0148) Grad: 1.9754 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.9735(2.9735) 


Epoch 15 - avg_train_loss: 3.0148  avg_val_loss: 3.0840  time: 4s
Epoch 15 - Score: 0.3013
Epoch 15 - Save Best score: 0.3013 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.0853(3.0840) 
Epoch: [16][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.9423(2.9423) Grad: 2.0361 LR: 0.000030  
Epoch: [16][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.9302(2.9658) Grad: 1.9838 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.9403(2.9403) 


Epoch 16 - avg_train_loss: 2.9658  avg_val_loss: 3.0271  time: 3s
Epoch 16 - Score: 0.3099
Epoch 16 - Save Best score: 0.3099 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.0453(3.0271) 
Epoch: [17][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.8959(2.8959) Grad: 1.8890 LR: 0.000088  
Epoch: [17][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 3.0244(2.9513) Grad: 2.0922 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.9869(2.9869) 


Epoch 17 - avg_train_loss: 2.9513  avg_val_loss: 3.0815  time: 3s
Epoch 17 - Score: 0.2907


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.1254(3.0815) 
Epoch: [18][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.8784(2.8784) Grad: 2.2229 LR: 0.000409  
Epoch: [18][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 3.0140(2.9469) Grad: 2.5956 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.9318(2.9318) 


Epoch 18 - avg_train_loss: 2.9469  avg_val_loss: 3.0386  time: 4s
Epoch 18 - Score: 0.3025


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.0598(3.0386) 
Epoch: [19][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.9307(2.9307) Grad: 2.0245 LR: 0.000406  
Epoch: [19][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 2.9887(2.9615) Grad: 2.4925 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 3.0898(3.0898) 


Epoch 19 - avg_train_loss: 2.9615  avg_val_loss: 3.2306  time: 4s
Epoch 19 - Score: 0.2638


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.2445(3.2306) 
Epoch: [20][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.9073(2.9073) Grad: 2.3723 LR: 0.000409  
Epoch: [20][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.8539(2.9530) Grad: 2.0663 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 3.3003(3.3003) 


Epoch 20 - avg_train_loss: 2.9530  avg_val_loss: 3.4080  time: 3s
Epoch 20 - Score: 0.2193


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.4871(3.4080) 
Epoch: [21][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 3.0443(3.0443) Grad: 2.9368 LR: 0.000365  
Epoch: [21][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.8995(2.9142) Grad: 2.6423 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 3.6050(3.6050) 


Epoch 21 - avg_train_loss: 2.9142  avg_val_loss: 3.7027  time: 4s
Epoch 21 - Score: 0.1648


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.7268(3.7027) 
Epoch: [22][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.8623(2.8623) Grad: 3.0661 LR: 0.000278  
Epoch: [22][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.6791(2.8318) Grad: 2.2203 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 3.7266(3.7266) 


Epoch 22 - avg_train_loss: 2.8318  avg_val_loss: 3.8269  time: 3s
Epoch 22 - Score: 0.1487


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.9669(3.8269) 
Epoch: [23][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.7121(2.7121) Grad: 2.2465 LR: 0.000174  
Epoch: [23][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.6931(2.7242) Grad: 2.5467 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.8240(2.8240) 


Epoch 23 - avg_train_loss: 2.7242  avg_val_loss: 2.9646  time: 3s
Epoch 23 - Score: 0.3142
Epoch 23 - Save Best score: 0.3142 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9804(2.9646) 
Epoch: [24][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.8437(2.8437) Grad: 2.6394 LR: 0.000085  
Epoch: [24][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.5097(2.6147) Grad: 2.5767 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5919(2.5919) 


Epoch 24 - avg_train_loss: 2.6147  avg_val_loss: 2.7058  time: 4s
Epoch 24 - Score: 0.3693
Epoch 24 - Save Best score: 0.3693 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7555(2.7058) 
Epoch: [25][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.5809(2.5809) Grad: 2.5224 LR: 0.000038  
Epoch: [25][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.5286(2.5286) Grad: 2.3119 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4657(2.4657) 


Epoch 25 - avg_train_loss: 2.5286  avg_val_loss: 2.5781  time: 3s
Epoch 25 - Score: 0.4024
Epoch 25 - Save Best score: 0.4024 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6189(2.5781) 
Epoch: [26][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.5043(2.5043) Grad: 2.7044 LR: 0.000030  
Epoch: [26][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.5190(2.4872) Grad: 2.2076 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4861(2.4861) 


Epoch 26 - avg_train_loss: 2.4872  avg_val_loss: 2.5909  time: 3s
Epoch 26 - Score: 0.3942


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6283(2.5909) 
Epoch: [27][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.5137(2.5137) Grad: 2.3614 LR: 0.000088  
Epoch: [27][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.6438(2.4879) Grad: 2.6048 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5034(2.5034) 


Epoch 27 - avg_train_loss: 2.4879  avg_val_loss: 2.6183  time: 4s
Epoch 27 - Score: 0.3953


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6409(2.6183) 
Epoch: [28][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.4488(2.4488) Grad: 2.3917 LR: 0.000409  
Epoch: [28][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 2.4522(2.5152) Grad: 2.6237 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.6986(2.6986) 


Epoch 28 - avg_train_loss: 2.5152  avg_val_loss: 2.8314  time: 4s
Epoch 28 - Score: 0.3408


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8350(2.8314) 
Epoch: [29][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.4457(2.4457) Grad: 2.7636 LR: 0.000406  
Epoch: [29][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.6183(2.5639) Grad: 3.0538 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.7861(2.7861) 


Epoch 29 - avg_train_loss: 2.5639  avg_val_loss: 2.8732  time: 3s
Epoch 29 - Score: 0.3321


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9528(2.8732) 
Epoch: [30][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.5743(2.5743) Grad: 2.4381 LR: 0.000409  
Epoch: [30][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.7026(2.6036) Grad: 2.7228 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.8445(2.8445) 


Epoch 30 - avg_train_loss: 2.6036  avg_val_loss: 2.9751  time: 4s
Epoch 30 - Score: 0.3227


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.0565(2.9751) 
Epoch: [31][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.3000(2.3000) Grad: 2.7800 LR: 0.000365  
Epoch: [31][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.5801(2.5965) Grad: 2.5825 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2794(3.2794) 


Epoch 31 - avg_train_loss: 2.5965  avg_val_loss: 3.3934  time: 3s
Epoch 31 - Score: 0.2177


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.4741(3.3934) 
Epoch: [32][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.4734(2.4734) Grad: 2.6469 LR: 0.000278  
Epoch: [32][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.5815(2.5396) Grad: 2.4934 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.9065(2.9065) 


Epoch 32 - avg_train_loss: 2.5396  avg_val_loss: 3.0184  time: 3s
Epoch 32 - Score: 0.2899


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.1003(3.0184) 
Epoch: [33][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.5513(2.5513) Grad: 2.6207 LR: 0.000174  
Epoch: [33][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.4642(2.4447) Grad: 2.5861 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.7505(2.7505) 


Epoch 33 - avg_train_loss: 2.4447  avg_val_loss: 2.8552  time: 4s
Epoch 33 - Score: 0.3413


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9044(2.8552) 
Epoch: [34][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.4105(2.4105) Grad: 2.5041 LR: 0.000085  
Epoch: [34][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.4480(2.3456) Grad: 2.7252 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4713(2.4713) 


Epoch 34 - avg_train_loss: 2.3456  avg_val_loss: 2.5967  time: 3s
Epoch 34 - Score: 0.3931


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6654(2.5967) 
Epoch: [35][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 2.3201(2.3201) Grad: 2.2650 LR: 0.000038  
Epoch: [35][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.0530(2.2723) Grad: 2.4023 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3532(2.3532) 


Epoch 35 - avg_train_loss: 2.2723  avg_val_loss: 2.4478  time: 3s
Epoch 35 - Score: 0.4248
Epoch 35 - Save Best score: 0.4248 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5006(2.4478) 
Epoch: [36][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.2007(2.2007) Grad: 2.4511 LR: 0.000030  
Epoch: [36][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.2820(2.2368) Grad: 2.6056 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2989(2.2989) 


Epoch 36 - avg_train_loss: 2.2368  avg_val_loss: 2.4035  time: 4s
Epoch 36 - Score: 0.4380
Epoch 36 - Save Best score: 0.4380 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4336(2.4035) 
Epoch: [37][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.2592(2.2592) Grad: 2.8821 LR: 0.000088  
Epoch: [37][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 2.1358(2.2399) Grad: 2.7146 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3128(2.3128) 


Epoch 37 - avg_train_loss: 2.2399  avg_val_loss: 2.4366  time: 4s
Epoch 37 - Score: 0.4278


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5083(2.4366) 
Epoch: [38][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.1142(2.1142) Grad: 2.3299 LR: 0.000409  
Epoch: [38][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.4146(2.2776) Grad: 2.5987 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4507(2.4507) 


Epoch 38 - avg_train_loss: 2.2776  avg_val_loss: 2.5485  time: 3s
Epoch 38 - Score: 0.4058


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5869(2.5485) 
Epoch: [39][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.4565(2.4565) Grad: 2.6525 LR: 0.000406  
Epoch: [39][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.3919(2.3330) Grad: 2.6422 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.6292(2.6292) 


Epoch 39 - avg_train_loss: 2.3330  avg_val_loss: 2.7310  time: 4s
Epoch 39 - Score: 0.3579


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7756(2.7310) 
Epoch: [40][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.3996(2.3996) Grad: 3.3478 LR: 0.000409  
Epoch: [40][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.3656(2.3989) Grad: 2.6250 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 3.5937(3.5937) 


Epoch 40 - avg_train_loss: 2.3989  avg_val_loss: 3.7313  time: 3s
Epoch 40 - Score: 0.1988


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.8040(3.7313) 
Epoch: [41][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.3180(2.3180) Grad: 2.6880 LR: 0.000365  
Epoch: [41][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.4196(2.3950) Grad: 3.0033 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2785(3.2785) 


Epoch 41 - avg_train_loss: 2.3950  avg_val_loss: 3.3398  time: 3s
Epoch 41 - Score: 0.2278


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.4892(3.3398) 
Epoch: [42][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.3465(2.3465) Grad: 2.6166 LR: 0.000278  
Epoch: [42][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.3473(2.3610) Grad: 2.7425 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.8872(2.8872) 


Epoch 42 - avg_train_loss: 2.3610  avg_val_loss: 2.9448  time: 3s
Epoch 42 - Score: 0.3137


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9991(2.9448) 
Epoch: [43][0/138] Elapsed 0m 0s (remain 0m 28s) Loss: 2.4473(2.4473) Grad: 2.7455 LR: 0.000174  
Epoch: [43][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.1969(2.2701) Grad: 2.4037 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4341(2.4341) 


Epoch 43 - avg_train_loss: 2.2701  avg_val_loss: 2.5494  time: 4s
Epoch 43 - Score: 0.3992


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6272(2.5494) 
Epoch: [44][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.1459(2.1459) Grad: 2.6883 LR: 0.000085  
Epoch: [44][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.2903(2.1731) Grad: 2.5336 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4168(2.4168) 


Epoch 44 - avg_train_loss: 2.1731  avg_val_loss: 2.4918  time: 3s
Epoch 44 - Score: 0.4084


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5314(2.4918) 
Epoch: [45][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.1355(2.1355) Grad: 2.6020 LR: 0.000038  
Epoch: [45][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.3214(2.0983) Grad: 2.9246 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2049(2.2049) 


Epoch 45 - avg_train_loss: 2.0983  avg_val_loss: 2.3114  time: 3s
Epoch 45 - Score: 0.4560
Epoch 45 - Save Best score: 0.4560 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3667(2.3114) 
Epoch: [46][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.0641(2.0641) Grad: 2.5479 LR: 0.000030  
Epoch: [46][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.9844(2.0640) Grad: 2.3822 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2166(2.2166) 


Epoch 46 - avg_train_loss: 2.0640  avg_val_loss: 2.2995  time: 4s
Epoch 46 - Score: 0.4605
Epoch 46 - Save Best score: 0.4605 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3409(2.2995) 
Epoch: [47][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.1128(2.1128) Grad: 2.4564 LR: 0.000088  
Epoch: [47][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.1042(2.0692) Grad: 2.4628 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2265(2.2265) 


Epoch 47 - avg_train_loss: 2.0692  avg_val_loss: 2.3157  time: 3s
Epoch 47 - Score: 0.4584


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3727(2.3157) 
Epoch: [48][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.8494(1.8494) Grad: 2.3300 LR: 0.000409  
Epoch: [48][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.1153(2.1093) Grad: 3.0046 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.3485(2.3485) 


Epoch 48 - avg_train_loss: 2.1093  avg_val_loss: 2.4420  time: 3s
Epoch 48 - Score: 0.4208


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4812(2.4420) 
Epoch: [49][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.1638(2.1638) Grad: 2.4959 LR: 0.000406  
Epoch: [49][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.1509(2.1827) Grad: 2.8964 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.8429(2.8429) 


Epoch 49 - avg_train_loss: 2.1827  avg_val_loss: 2.9718  time: 4s
Epoch 49 - Score: 0.2972


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9911(2.9718) 
Epoch: [50][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.3522(2.3522) Grad: 2.7454 LR: 0.000409  
Epoch: [50][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.2581(2.2394) Grad: 3.2133 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5991(2.5991) 


Epoch 50 - avg_train_loss: 2.2394  avg_val_loss: 2.7241  time: 3s
Epoch 50 - Score: 0.3624


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7987(2.7241) 
Epoch: [51][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.1411(2.1411) Grad: 2.7838 LR: 0.000365  
Epoch: [51][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.2605(2.2626) Grad: 2.9338 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.7467(2.7467) 


Epoch 51 - avg_train_loss: 2.2626  avg_val_loss: 2.8202  time: 3s
Epoch 51 - Score: 0.3488


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8974(2.8202) 
Epoch: [52][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.3374(2.3374) Grad: 2.6944 LR: 0.000278  
Epoch: [52][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.2810(2.2147) Grad: 2.8829 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.8436(2.8436) 


Epoch 52 - avg_train_loss: 2.2147  avg_val_loss: 2.9307  time: 4s
Epoch 52 - Score: 0.3305


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9604(2.9307) 
Epoch: [53][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.2108(2.2108) Grad: 2.7223 LR: 0.000174  
Epoch: [53][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.1211(2.1477) Grad: 2.5667 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.3342(2.3342) 


Epoch 53 - avg_train_loss: 2.1477  avg_val_loss: 2.4606  time: 3s
Epoch 53 - Score: 0.4047


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5198(2.4606) 
Epoch: [54][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.9872(1.9872) Grad: 2.7982 LR: 0.000085  
Epoch: [54][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.0528(2.0424) Grad: 2.6287 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3638(2.3638) 


Epoch 54 - avg_train_loss: 2.0424  avg_val_loss: 2.4316  time: 3s
Epoch 54 - Score: 0.4246


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4874(2.4316) 
Epoch: [55][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.9404(1.9404) Grad: 2.4993 LR: 0.000038  
Epoch: [55][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 2.0332(1.9706) Grad: 2.3759 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1545(2.1545) 


Epoch 55 - avg_train_loss: 1.9706  avg_val_loss: 2.2378  time: 4s
Epoch 55 - Score: 0.4761
Epoch 55 - Save Best score: 0.4761 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2662(2.2378) 
Epoch: [56][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.9393(1.9393) Grad: 2.6541 LR: 0.000030  
Epoch: [56][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9903(1.9331) Grad: 2.6018 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1327(2.1327) 


Epoch 56 - avg_train_loss: 1.9331  avg_val_loss: 2.2145  time: 3s
Epoch 56 - Score: 0.4813
Epoch 56 - Save Best score: 0.4813 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2653(2.2145) 
Epoch: [57][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.0219(2.0219) Grad: 2.8072 LR: 0.000088  
Epoch: [57][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8450(1.9445) Grad: 2.5369 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1842(2.1842) 


Epoch 57 - avg_train_loss: 1.9445  avg_val_loss: 2.2491  time: 3s
Epoch 57 - Score: 0.4713


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2859(2.2491) 
Epoch: [58][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7533(1.7533) Grad: 2.5732 LR: 0.000409  
Epoch: [58][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.1005(1.9897) Grad: 2.6787 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1906(2.1906) 


Epoch 58 - avg_train_loss: 1.9897  avg_val_loss: 2.3158  time: 4s
Epoch 58 - Score: 0.4619


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3908(2.3158) 
Epoch: [59][0/138] Elapsed 0m 0s (remain 0m 19s) Loss: 2.0038(2.0038) Grad: 3.1377 LR: 0.000406  
Epoch: [59][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.2106(2.0609) Grad: 3.0913 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4813(2.4813) 


Epoch 59 - avg_train_loss: 2.0609  avg_val_loss: 2.5503  time: 3s
Epoch 59 - Score: 0.3969


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5980(2.5503) 
Epoch: [60][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.1639(2.1639) Grad: 2.6760 LR: 0.000409  
Epoch: [60][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8815(2.1292) Grad: 2.7550 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 3.1021(3.1021) 


Epoch 60 - avg_train_loss: 2.1292  avg_val_loss: 3.1799  time: 3s
Epoch 60 - Score: 0.2750


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.3159(3.1799) 
Epoch: [61][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.2041(2.2041) Grad: 2.8793 LR: 0.000365  
Epoch: [61][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.1772(2.1562) Grad: 3.0701 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.7236(2.7236) 


Epoch 61 - avg_train_loss: 2.1562  avg_val_loss: 2.8420  time: 4s
Epoch 61 - Score: 0.3302


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9257(2.8420) 
Epoch: [62][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.9933(1.9933) Grad: 2.8531 LR: 0.000278  
Epoch: [62][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.0687(2.1166) Grad: 2.6113 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.7860(2.7860) 


Epoch 62 - avg_train_loss: 2.1166  avg_val_loss: 2.8778  time: 3s
Epoch 62 - Score: 0.3283


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9610(2.8778) 
Epoch: [63][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.0917(2.0917) Grad: 2.9600 LR: 0.000174  
Epoch: [63][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.0693(2.0352) Grad: 3.3037 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5624(2.5624) 


Epoch 63 - avg_train_loss: 2.0352  avg_val_loss: 2.6892  time: 3s
Epoch 63 - Score: 0.3759


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7783(2.6892) 
Epoch: [64][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 2.1404(2.1404) Grad: 2.8040 LR: 0.000085  
Epoch: [64][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.9811(1.9560) Grad: 2.7892 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.1991(2.1991) 


Epoch 64 - avg_train_loss: 1.9560  avg_val_loss: 2.2981  time: 4s
Epoch 64 - Score: 0.4563


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3576(2.2981) 
Epoch: [65][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.9541(1.9541) Grad: 2.5674 LR: 0.000038  
Epoch: [65][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9047(1.8732) Grad: 2.6132 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1091(2.1091) 


Epoch 65 - avg_train_loss: 1.8732  avg_val_loss: 2.1624  time: 3s
Epoch 65 - Score: 0.4923
Epoch 65 - Save Best score: 0.4923 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2201(2.1624) 
Epoch: [66][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.6856(1.6856) Grad: 2.6286 LR: 0.000030  
Epoch: [66][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.0384(1.8331) Grad: 3.1684 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0845(2.0845) 


Epoch 66 - avg_train_loss: 1.8331  avg_val_loss: 2.1594  time: 3s
Epoch 66 - Score: 0.4955
Epoch 66 - Save Best score: 0.4955 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2149(2.1594) 
Epoch: [67][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.8079(1.8079) Grad: 2.6950 LR: 0.000088  
Epoch: [67][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8216(1.8426) Grad: 2.9111 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0698(2.0698) 


Epoch 67 - avg_train_loss: 1.8426  avg_val_loss: 2.1506  time: 4s
Epoch 67 - Score: 0.4919


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2029(2.1506) 
Epoch: [68][0/138] Elapsed 0m 0s (remain 0m 31s) Loss: 1.7837(1.7837) Grad: 2.6606 LR: 0.000409  
Epoch: [68][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9489(1.8921) Grad: 2.8738 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2804(2.2804) 


Epoch 68 - avg_train_loss: 1.8921  avg_val_loss: 2.3038  time: 3s
Epoch 68 - Score: 0.4532


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3416(2.3038) 
Epoch: [69][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.9276(1.9276) Grad: 2.7986 LR: 0.000406  
Epoch: [69][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.0509(1.9689) Grad: 3.1146 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3995(2.3995) 


Epoch 69 - avg_train_loss: 1.9689  avg_val_loss: 2.4866  time: 3s
Epoch 69 - Score: 0.4129


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5435(2.4866) 
Epoch: [70][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.9745(1.9745) Grad: 3.0017 LR: 0.000409  
Epoch: [70][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9797(2.0402) Grad: 2.8263 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.6175(2.6175) 


Epoch 70 - avg_train_loss: 2.0402  avg_val_loss: 2.7614  time: 3s
Epoch 70 - Score: 0.3558


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7863(2.7614) 
Epoch: [71][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.9697(1.9697) Grad: 3.0611 LR: 0.000365  
Epoch: [71][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.2139(2.0620) Grad: 3.0891 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.7856(2.7856) 


Epoch 71 - avg_train_loss: 2.0620  avg_val_loss: 2.8472  time: 4s
Epoch 71 - Score: 0.3442


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9396(2.8472) 
Epoch: [72][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.9480(1.9480) Grad: 3.0672 LR: 0.000278  
Epoch: [72][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 2.0230(2.0447) Grad: 2.7615 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 3.0493(3.0493) 


Epoch 72 - avg_train_loss: 2.0447  avg_val_loss: 3.0517  time: 3s
Epoch 72 - Score: 0.3030


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.0423(3.0517) 
Epoch: [73][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 2.0420(2.0420) Grad: 2.5140 LR: 0.000174  
Epoch: [73][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.9293(1.9641) Grad: 2.8273 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5474(2.5474) 


Epoch 73 - avg_train_loss: 1.9641  avg_val_loss: 2.6134  time: 4s
Epoch 73 - Score: 0.3869


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6643(2.6134) 
Epoch: [74][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.8235(1.8235) Grad: 2.6051 LR: 0.000085  
Epoch: [74][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8518(1.8660) Grad: 2.7377 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2382(2.2382) 


Epoch 74 - avg_train_loss: 1.8660  avg_val_loss: 2.3058  time: 4s
Epoch 74 - Score: 0.4632


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3671(2.3058) 
Epoch: [75][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.8076(1.8076) Grad: 2.9358 LR: 0.000038  
Epoch: [75][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8292(1.7975) Grad: 2.5380 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0612(2.0612) 


Epoch 75 - avg_train_loss: 1.7975  avg_val_loss: 2.1237  time: 3s
Epoch 75 - Score: 0.5003
Epoch 75 - Save Best score: 0.5003 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1692(2.1237) 
Epoch: [76][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.7924(1.7924) Grad: 2.6879 LR: 0.000030  
Epoch: [76][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7249(1.7528) Grad: 2.7375 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0481(2.0481) 


Epoch 76 - avg_train_loss: 1.7528  avg_val_loss: 2.1096  time: 3s
Epoch 76 - Score: 0.5073
Epoch 76 - Save Best score: 0.5073 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1577(2.1096) 
Epoch: [77][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7254(1.7254) Grad: 2.7339 LR: 0.000088  
Epoch: [77][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8708(1.7620) Grad: 2.7922 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0490(2.0490) 


Epoch 77 - avg_train_loss: 1.7620  avg_val_loss: 2.1174  time: 4s
Epoch 77 - Score: 0.5033


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1812(2.1174) 
Epoch: [78][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.8038(1.8038) Grad: 2.6933 LR: 0.000409  
Epoch: [78][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6764(1.8112) Grad: 2.7009 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1825(2.1825) 


Epoch 78 - avg_train_loss: 1.8112  avg_val_loss: 2.2420  time: 3s
Epoch 78 - Score: 0.4710


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2911(2.2420) 
Epoch: [79][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7311(1.7311) Grad: 2.6781 LR: 0.000406  
Epoch: [79][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9494(1.8862) Grad: 3.1992 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.5628(2.5628) 


Epoch 79 - avg_train_loss: 1.8862  avg_val_loss: 2.6663  time: 3s
Epoch 79 - Score: 0.3750


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7208(2.6663) 
Epoch: [80][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.9146(1.9146) Grad: 2.7811 LR: 0.000409  
Epoch: [80][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9445(1.9703) Grad: 3.0009 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4243(2.4243) 


Epoch 80 - avg_train_loss: 1.9703  avg_val_loss: 2.5162  time: 4s
Epoch 80 - Score: 0.4074


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5160(2.5162) 
Epoch: [81][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.8978(1.8978) Grad: 2.7635 LR: 0.000365  
Epoch: [81][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9450(1.9955) Grad: 2.9939 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6840(2.6840) 


Epoch 81 - avg_train_loss: 1.9955  avg_val_loss: 2.7603  time: 3s
Epoch 81 - Score: 0.3494


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8226(2.7603) 
Epoch: [82][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.9789(1.9789) Grad: 2.8909 LR: 0.000278  
Epoch: [82][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.8384(1.9756) Grad: 2.6585 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4162(2.4162) 


Epoch 82 - avg_train_loss: 1.9756  avg_val_loss: 2.5011  time: 4s
Epoch 82 - Score: 0.4185


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5480(2.5011) 
Epoch: [83][0/138] Elapsed 0m 0s (remain 0m 24s) Loss: 1.7622(1.7622) Grad: 2.4725 LR: 0.000174  
Epoch: [83][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9463(1.9011) Grad: 2.8681 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5993(2.5993) 


Epoch 83 - avg_train_loss: 1.9011  avg_val_loss: 2.6536  time: 4s
Epoch 83 - Score: 0.3706


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7062(2.6536) 
Epoch: [84][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.7761(1.7761) Grad: 2.7111 LR: 0.000085  
Epoch: [84][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5757(1.8035) Grad: 2.5627 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1169(2.1169) 


Epoch 84 - avg_train_loss: 1.8035  avg_val_loss: 2.1985  time: 3s
Epoch 84 - Score: 0.4862


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2626(2.1985) 
Epoch: [85][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.9378(1.9378) Grad: 2.7845 LR: 0.000038  
Epoch: [85][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6891(1.7359) Grad: 2.7948 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0171(2.0171) 


Epoch 85 - avg_train_loss: 1.7359  avg_val_loss: 2.0850  time: 3s
Epoch 85 - Score: 0.5099
Epoch 85 - Save Best score: 0.5099 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1159(2.0850) 
Epoch: [86][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7263(1.7263) Grad: 2.4566 LR: 0.000030  
Epoch: [86][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7131(1.6924) Grad: 2.6880 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0116(2.0116) 


Epoch 86 - avg_train_loss: 1.6924  avg_val_loss: 2.0658  time: 4s
Epoch 86 - Score: 0.5167
Epoch 86 - Save Best score: 0.5167 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1111(2.0658) 
Epoch: [87][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7667(1.7667) Grad: 2.5139 LR: 0.000088  
Epoch: [87][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8137(1.6987) Grad: 2.8321 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0061(2.0061) 


Epoch 87 - avg_train_loss: 1.6987  avg_val_loss: 2.0852  time: 3s
Epoch 87 - Score: 0.5093


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1248(2.0852) 
Epoch: [88][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6192(1.6192) Grad: 2.6199 LR: 0.000409  
Epoch: [88][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6714(1.7493) Grad: 2.9438 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 9s) Loss: 2.1024(2.1024) 


Epoch 88 - avg_train_loss: 1.7493  avg_val_loss: 2.1723  time: 3s
Epoch 88 - Score: 0.4906


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1863(2.1723) 
Epoch: [89][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.8006(1.8006) Grad: 2.9930 LR: 0.000406  
Epoch: [89][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8837(1.8445) Grad: 2.7777 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.5487(2.5487) 


Epoch 89 - avg_train_loss: 1.8445  avg_val_loss: 2.5618  time: 4s
Epoch 89 - Score: 0.3924


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5881(2.5618) 
Epoch: [90][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.8279(1.8279) Grad: 2.8409 LR: 0.000409  
Epoch: [90][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8091(1.9046) Grad: 3.1172 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.7053(2.7053) 


Epoch 90 - avg_train_loss: 1.9046  avg_val_loss: 2.8107  time: 3s
Epoch 90 - Score: 0.3379


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8493(2.8107) 
Epoch: [91][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.8928(1.8928) Grad: 2.9789 LR: 0.000365  
Epoch: [91][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 2.1850(1.9435) Grad: 3.3116 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.6150(2.6150) 


Epoch 91 - avg_train_loss: 1.9435  avg_val_loss: 2.6984  time: 4s
Epoch 91 - Score: 0.3732


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7641(2.6984) 
Epoch: [92][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.9255(1.9255) Grad: 3.0747 LR: 0.000278  
Epoch: [92][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9577(1.9211) Grad: 2.8239 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 9s) Loss: 2.7679(2.7679) 


Epoch 92 - avg_train_loss: 1.9211  avg_val_loss: 2.8806  time: 4s
Epoch 92 - Score: 0.3318


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9327(2.8806) 
Epoch: [93][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.8471(1.8471) Grad: 2.8171 LR: 0.000174  
Epoch: [93][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9215(1.8286) Grad: 2.9638 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2712(2.2712) 


Epoch 93 - avg_train_loss: 1.8286  avg_val_loss: 2.3381  time: 3s
Epoch 93 - Score: 0.4494


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4138(2.3381) 
Epoch: [94][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.7010(1.7010) Grad: 2.5657 LR: 0.000085  
Epoch: [94][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7915(1.7427) Grad: 2.8823 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0427(2.0427) 


Epoch 94 - avg_train_loss: 1.7427  avg_val_loss: 2.1547  time: 3s
Epoch 94 - Score: 0.4927


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2310(2.1547) 
Epoch: [95][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6172(1.6172) Grad: 2.6537 LR: 0.000038  
Epoch: [95][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6260(1.6744) Grad: 2.6543 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0009(2.0009) 


Epoch 95 - avg_train_loss: 1.6744  avg_val_loss: 2.0502  time: 4s
Epoch 95 - Score: 0.5194
Epoch 95 - Save Best score: 0.5194 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0973(2.0502) 
Epoch: [96][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5123(1.5123) Grad: 2.5750 LR: 0.000030  
Epoch: [96][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5528(1.6296) Grad: 2.8359 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9709(1.9709) 


Epoch 96 - avg_train_loss: 1.6296  avg_val_loss: 2.0279  time: 3s
Epoch 96 - Score: 0.5254
Epoch 96 - Save Best score: 0.5254 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0825(2.0279) 
Epoch: [97][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.6575(1.6575) Grad: 2.5823 LR: 0.000088  
Epoch: [97][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5962(1.6363) Grad: 3.0601 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.0010(2.0010) 


Epoch 97 - avg_train_loss: 1.6363  avg_val_loss: 2.0757  time: 4s
Epoch 97 - Score: 0.5167


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1709(2.0757) 
Epoch: [98][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6484(1.6484) Grad: 2.8017 LR: 0.000409  
Epoch: [98][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6989(1.6906) Grad: 2.8033 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0399(2.0399) 


Epoch 98 - avg_train_loss: 1.6906  avg_val_loss: 2.1425  time: 3s
Epoch 98 - Score: 0.4971


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2216(2.1425) 
Epoch: [99][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.6292(1.6292) Grad: 2.6539 LR: 0.000406  
Epoch: [99][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7573(1.7778) Grad: 2.8890 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.3098(2.3098) 


Epoch 99 - avg_train_loss: 1.7778  avg_val_loss: 2.4034  time: 4s
Epoch 99 - Score: 0.4363


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5014(2.4034) 
Epoch: [100][0/138] Elapsed 0m 0s (remain 0m 26s) Loss: 1.6803(1.6803) Grad: 2.8341 LR: 0.000409  
Epoch: [100][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9215(1.8688) Grad: 3.0322 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5191(2.5191) 


Epoch 100 - avg_train_loss: 1.8688  avg_val_loss: 2.6138  time: 4s
Epoch 100 - Score: 0.3944


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6852(2.6138) 
Epoch: [101][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6901(1.6901) Grad: 2.9323 LR: 0.000365  
Epoch: [101][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8397(1.8964) Grad: 2.7219 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6448(2.6448) 


Epoch 101 - avg_train_loss: 1.8964  avg_val_loss: 2.7544  time: 3s
Epoch 101 - Score: 0.3603


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8244(2.7544) 
Epoch: [102][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 2.0209(2.0209) Grad: 3.0800 LR: 0.000278  
Epoch: [102][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8065(1.8763) Grad: 2.7490 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.7339(2.7339) 


Epoch 102 - avg_train_loss: 1.8763  avg_val_loss: 2.8253  time: 4s
Epoch 102 - Score: 0.3379


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8821(2.8253) 
Epoch: [103][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 2.0317(2.0317) Grad: 3.0148 LR: 0.000174  
Epoch: [103][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7417(1.7982) Grad: 2.6759 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2824(2.2824) 


Epoch 103 - avg_train_loss: 1.7982  avg_val_loss: 2.3985  time: 3s
Epoch 103 - Score: 0.4356


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5095(2.3985) 
Epoch: [104][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6695(1.6695) Grad: 2.6594 LR: 0.000085  
Epoch: [104][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6551(1.7094) Grad: 2.5213 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1664(2.1664) 


Epoch 104 - avg_train_loss: 1.7094  avg_val_loss: 2.2148  time: 3s
Epoch 104 - Score: 0.4774


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2335(2.2148) 
Epoch: [105][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7526(1.7526) Grad: 2.8119 LR: 0.000038  
Epoch: [105][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5758(1.6316) Grad: 2.6662 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9746(1.9746) 


Epoch 105 - avg_train_loss: 1.6316  avg_val_loss: 2.0660  time: 4s
Epoch 105 - Score: 0.5120


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1372(2.0660) 
Epoch: [106][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5601(1.5601) Grad: 2.4796 LR: 0.000030  
Epoch: [106][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4936(1.5847) Grad: 2.6726 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.9561(1.9561) 


Epoch 106 - avg_train_loss: 1.5847  avg_val_loss: 2.0168  time: 3s
Epoch 106 - Score: 0.5284
Epoch 106 - Save Best score: 0.5284 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0629(2.0168) 
Epoch: [107][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6486(1.6486) Grad: 2.7961 LR: 0.000088  
Epoch: [107][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6975(1.5947) Grad: 3.1137 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9882(1.9882) 


Epoch 107 - avg_train_loss: 1.5947  avg_val_loss: 2.0357  time: 3s
Epoch 107 - Score: 0.5207


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0961(2.0357) 
Epoch: [108][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.6456(1.6456) Grad: 2.7276 LR: 0.000409  
Epoch: [108][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7506(1.6509) Grad: 2.8820 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.1178(2.1178) 


Epoch 108 - avg_train_loss: 1.6509  avg_val_loss: 2.1787  time: 4s
Epoch 108 - Score: 0.4865


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.2156(2.1787) 
Epoch: [109][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.6330(1.6330) Grad: 2.9092 LR: 0.000406  
Epoch: [109][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7318(1.7309) Grad: 3.0241 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2433(2.2433) 


Epoch 109 - avg_train_loss: 1.7309  avg_val_loss: 2.3274  time: 3s
Epoch 109 - Score: 0.4552


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4168(2.3274) 
Epoch: [110][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.5141(1.5141) Grad: 2.4919 LR: 0.000409  
Epoch: [110][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9059(1.8182) Grad: 2.8062 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.7125(2.7125) 


Epoch 110 - avg_train_loss: 1.8182  avg_val_loss: 2.7698  time: 3s
Epoch 110 - Score: 0.3532


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8237(2.7698) 
Epoch: [111][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6196(1.6196) Grad: 2.8043 LR: 0.000365  
Epoch: [111][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.9509(1.8518) Grad: 2.9093 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.8495(2.8495) 


Epoch 111 - avg_train_loss: 1.8518  avg_val_loss: 2.8677  time: 4s
Epoch 111 - Score: 0.3460


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9244(2.8677) 
Epoch: [112][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.8505(1.8505) Grad: 2.8676 LR: 0.000278  
Epoch: [112][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8757(1.8371) Grad: 2.8194 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4258(2.4258) 


Epoch 112 - avg_train_loss: 1.8371  avg_val_loss: 2.4846  time: 3s
Epoch 112 - Score: 0.4150


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5396(2.4846) 
Epoch: [113][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7937(1.7937) Grad: 2.7812 LR: 0.000174  
Epoch: [113][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7789(1.7578) Grad: 2.6772 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1401(2.1401) 


Epoch 113 - avg_train_loss: 1.7578  avg_val_loss: 2.2111  time: 3s
Epoch 113 - Score: 0.4906


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2504(2.2111) 
Epoch: [114][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.8573(1.8573) Grad: 2.8230 LR: 0.000085  
Epoch: [114][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7931(1.6759) Grad: 2.9923 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0826(2.0826) 


Epoch 114 - avg_train_loss: 1.6759  avg_val_loss: 2.1466  time: 4s
Epoch 114 - Score: 0.4928


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1764(2.1466) 
Epoch: [115][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.6846(1.6846) Grad: 3.3681 LR: 0.000038  
Epoch: [115][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6320(1.5907) Grad: 2.9702 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.9357(1.9357) 


Epoch 115 - avg_train_loss: 1.5907  avg_val_loss: 2.0171  time: 3s
Epoch 115 - Score: 0.5269


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0666(2.0171) 
Epoch: [116][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.7405(1.7405) Grad: 2.6959 LR: 0.000030  
Epoch: [116][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5252(1.5553) Grad: 2.8185 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9388(1.9388) 


Epoch 116 - avg_train_loss: 1.5553  avg_val_loss: 2.0011  time: 3s
Epoch 116 - Score: 0.5304
Epoch 116 - Save Best score: 0.5304 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0548(2.0011) 
Epoch: [117][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.4568(1.4568) Grad: 2.6677 LR: 0.000088  
Epoch: [117][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5684(1.5581) Grad: 2.9634 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 12s) Loss: 1.9251(1.9251) 


Epoch 117 - avg_train_loss: 1.5581  avg_val_loss: 2.0061  time: 4s
Epoch 117 - Score: 0.5280


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.0697(2.0061) 
Epoch: [118][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5498(1.5498) Grad: 2.5731 LR: 0.000409  
Epoch: [118][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5716(1.6046) Grad: 2.6112 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0557(2.0557) 


Epoch 118 - avg_train_loss: 1.6046  avg_val_loss: 2.1128  time: 3s
Epoch 118 - Score: 0.4997


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1402(2.1128) 
Epoch: [119][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6788(1.6788) Grad: 3.0755 LR: 0.000406  
Epoch: [119][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6632(1.6974) Grad: 2.6430 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1582(2.1582) 


Epoch 119 - avg_train_loss: 1.6974  avg_val_loss: 2.2635  time: 3s
Epoch 119 - Score: 0.4694


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3212(2.2635) 
Epoch: [120][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6515(1.6515) Grad: 2.8537 LR: 0.000409  
Epoch: [120][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7188(1.7753) Grad: 2.6613 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5031(2.5031) 


Epoch 120 - avg_train_loss: 1.7753  avg_val_loss: 2.5421  time: 4s
Epoch 120 - Score: 0.4016


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5783(2.5421) 
Epoch: [121][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.7911(1.7911) Grad: 3.1673 LR: 0.000365  
Epoch: [121][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8466(1.8160) Grad: 2.6678 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4316(2.4316) 


Epoch 121 - avg_train_loss: 1.8160  avg_val_loss: 2.5100  time: 3s
Epoch 121 - Score: 0.4228


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5507(2.5100) 
Epoch: [122][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.8404(1.8404) Grad: 2.7723 LR: 0.000278  
Epoch: [122][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8886(1.7900) Grad: 3.0675 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5826(2.5826) 


Epoch 122 - avg_train_loss: 1.7900  avg_val_loss: 2.6937  time: 3s
Epoch 122 - Score: 0.3648


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6961(2.6937) 
Epoch: [123][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7236(1.7236) Grad: 2.7735 LR: 0.000174  
Epoch: [123][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6963(1.7186) Grad: 2.6427 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2382(2.2382) 


Epoch 123 - avg_train_loss: 1.7186  avg_val_loss: 2.3191  time: 4s
Epoch 123 - Score: 0.4424


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3455(2.3191) 
Epoch: [124][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.5709(1.5709) Grad: 2.6421 LR: 0.000085  
Epoch: [124][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5049(1.6262) Grad: 2.7665 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.0671(2.0671) 


Epoch 124 - avg_train_loss: 1.6262  avg_val_loss: 2.1315  time: 3s
Epoch 124 - Score: 0.4996


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2374(2.1315) 
Epoch: [125][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6095(1.6095) Grad: 2.6415 LR: 0.000038  
Epoch: [125][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4635(1.5454) Grad: 2.5991 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9270(1.9270) 


Epoch 125 - avg_train_loss: 1.5454  avg_val_loss: 1.9992  time: 3s
Epoch 125 - Score: 0.5290


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0367(1.9992) 
Epoch: [126][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4807(1.4807) Grad: 2.6735 LR: 0.000030  
Epoch: [126][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4466(1.5125) Grad: 2.6653 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.9147(1.9147) 


Epoch 126 - avg_train_loss: 1.5125  avg_val_loss: 1.9790  time: 4s
Epoch 126 - Score: 0.5374
Epoch 126 - Save Best score: 0.5374 Model


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.0244(1.9790) 
Epoch: [127][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.5354(1.5354) Grad: 2.6954 LR: 0.000088  
Epoch: [127][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6056(1.5223) Grad: 2.7248 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9388(1.9388) 


Epoch 127 - avg_train_loss: 1.5223  avg_val_loss: 2.0238  time: 3s
Epoch 127 - Score: 0.5286


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0903(2.0238) 
Epoch: [128][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4655(1.4655) Grad: 2.7242 LR: 0.000409  
Epoch: [128][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4624(1.5677) Grad: 2.9170 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0113(2.0113) 


Epoch 128 - avg_train_loss: 1.5677  avg_val_loss: 2.1055  time: 3s
Epoch 128 - Score: 0.5062


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1666(2.1055) 
Epoch: [129][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5692(1.5692) Grad: 2.7008 LR: 0.000406  
Epoch: [129][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7705(1.6695) Grad: 2.9567 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3507(2.3507) 


Epoch 129 - avg_train_loss: 1.6695  avg_val_loss: 2.4735  time: 3s
Epoch 129 - Score: 0.4161


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5061(2.4735) 
Epoch: [130][0/138] Elapsed 0m 0s (remain 0m 31s) Loss: 1.7097(1.7097) Grad: 2.9585 LR: 0.000409  
Epoch: [130][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6586(1.7443) Grad: 2.7751 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5023(2.5023) 


Epoch 130 - avg_train_loss: 1.7443  avg_val_loss: 2.5655  time: 3s
Epoch 130 - Score: 0.4005


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6539(2.5655) 
Epoch: [131][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.8338(1.8338) Grad: 2.9372 LR: 0.000365  
Epoch: [131][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8306(1.7950) Grad: 2.9979 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.7797(2.7797) 


Epoch 131 - avg_train_loss: 1.7950  avg_val_loss: 2.8068  time: 3s
Epoch 131 - Score: 0.3433


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8895(2.8068) 
Epoch: [132][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7307(1.7307) Grad: 2.9681 LR: 0.000278  
Epoch: [132][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6719(1.7598) Grad: 2.7302 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4015(2.4015) 


Epoch 132 - avg_train_loss: 1.7598  avg_val_loss: 2.5078  time: 3s
Epoch 132 - Score: 0.4120


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5789(2.5078) 
Epoch: [133][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6151(1.6151) Grad: 2.9312 LR: 0.000174  
Epoch: [133][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6588(1.6970) Grad: 2.8403 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.3345(2.3345) 


Epoch 133 - avg_train_loss: 1.6970  avg_val_loss: 2.3943  time: 4s
Epoch 133 - Score: 0.4420


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4101(2.3943) 
Epoch: [134][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.6135(1.6135) Grad: 2.9013 LR: 0.000085  
Epoch: [134][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6061(1.5915) Grad: 2.6643 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0610(2.0610) 


Epoch 134 - avg_train_loss: 1.5915  avg_val_loss: 2.1154  time: 3s
Epoch 134 - Score: 0.5004


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1800(2.1154) 
Epoch: [135][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5178(1.5178) Grad: 2.6988 LR: 0.000038  
Epoch: [135][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6038(1.5205) Grad: 2.7345 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.9164(1.9164) 


Epoch 135 - avg_train_loss: 1.5205  avg_val_loss: 1.9828  time: 4s
Epoch 135 - Score: 0.5373


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.0465(1.9828) 
Epoch: [136][0/138] Elapsed 0m 0s (remain 0m 32s) Loss: 1.5301(1.5301) Grad: 2.7999 LR: 0.000030  
Epoch: [136][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4719(1.4842) Grad: 3.0366 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9072(1.9072) 


Epoch 136 - avg_train_loss: 1.4842  avg_val_loss: 1.9564  time: 4s
Epoch 136 - Score: 0.5398
Epoch 136 - Save Best score: 0.5398 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0065(1.9564) 
Epoch: [137][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3870(1.3870) Grad: 2.6704 LR: 0.000088  
Epoch: [137][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4383(1.4833) Grad: 2.8210 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8956(1.8956) 


Epoch 137 - avg_train_loss: 1.4833  avg_val_loss: 1.9773  time: 3s
Epoch 137 - Score: 0.5393


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0524(1.9773) 
Epoch: [138][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6119(1.6119) Grad: 3.0321 LR: 0.000409  
Epoch: [138][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5661(1.5478) Grad: 2.9985 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0807(2.0807) 


Epoch 138 - avg_train_loss: 1.5478  avg_val_loss: 2.1449  time: 3s
Epoch 138 - Score: 0.4955


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1980(2.1449) 
Epoch: [139][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4299(1.4299) Grad: 2.7025 LR: 0.000406  
Epoch: [139][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5994(1.6337) Grad: 3.0677 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.0949(2.0949) 


Epoch 139 - avg_train_loss: 1.6337  avg_val_loss: 2.1969  time: 4s
Epoch 139 - Score: 0.4859


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2412(2.1969) 
Epoch: [140][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5222(1.5222) Grad: 2.8637 LR: 0.000409  
Epoch: [140][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7872(1.7197) Grad: 2.9230 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4367(2.4367) 


Epoch 140 - avg_train_loss: 1.7197  avg_val_loss: 2.5403  time: 3s
Epoch 140 - Score: 0.4022


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6052(2.5403) 
Epoch: [141][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7745(1.7745) Grad: 3.1728 LR: 0.000365  
Epoch: [141][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6478(1.7563) Grad: 2.6556 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6322(2.6322) 


Epoch 141 - avg_train_loss: 1.7563  avg_val_loss: 2.7210  time: 3s
Epoch 141 - Score: 0.3599


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8174(2.7210) 
Epoch: [142][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.9352(1.9352) Grad: 2.8092 LR: 0.000278  
Epoch: [142][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7017(1.7299) Grad: 3.0671 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 10s) Loss: 2.5873(2.5873) 


Epoch 142 - avg_train_loss: 1.7299  avg_val_loss: 2.6909  time: 4s
Epoch 142 - Score: 0.3666


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7342(2.6909) 
Epoch: [143][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7363(1.7363) Grad: 2.9175 LR: 0.000174  
Epoch: [143][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6361(1.6613) Grad: 3.0137 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3885(2.3885) 


Epoch 143 - avg_train_loss: 1.6613  avg_val_loss: 2.4827  time: 3s
Epoch 143 - Score: 0.4200


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5298(2.4827) 
Epoch: [144][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6648(1.6648) Grad: 2.7700 LR: 0.000085  
Epoch: [144][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5078(1.5702) Grad: 2.8078 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.0702(2.0702) 


Epoch 144 - avg_train_loss: 1.5702  avg_val_loss: 2.1134  time: 4s
Epoch 144 - Score: 0.5036


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.1670(2.1134) 
Epoch: [145][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5762(1.5762) Grad: 2.9903 LR: 0.000038  
Epoch: [145][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5401(1.4984) Grad: 2.7328 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9452(1.9452) 


Epoch 145 - avg_train_loss: 1.4984  avg_val_loss: 1.9856  time: 4s
Epoch 145 - Score: 0.5355


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0513(1.9856) 
Epoch: [146][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.5040(1.5040) Grad: 2.6064 LR: 0.000030  
Epoch: [146][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5068(1.4506) Grad: 2.8456 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8765(1.8765) 


Epoch 146 - avg_train_loss: 1.4506  avg_val_loss: 1.9309  time: 3s
Epoch 146 - Score: 0.5451
Epoch 146 - Save Best score: 0.5451 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9634(1.9309) 
Epoch: [147][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5357(1.5357) Grad: 2.9974 LR: 0.000088  
Epoch: [147][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5480(1.4616) Grad: 2.8047 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9173(1.9173) 


Epoch 147 - avg_train_loss: 1.4616  avg_val_loss: 1.9748  time: 3s
Epoch 147 - Score: 0.5385


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0262(1.9748) 
Epoch: [148][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4756(1.4756) Grad: 2.8471 LR: 0.000409  
Epoch: [148][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5406(1.5159) Grad: 2.8369 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.9842(1.9842) 


Epoch 148 - avg_train_loss: 1.5159  avg_val_loss: 2.0861  time: 4s
Epoch 148 - Score: 0.5055


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2017(2.0861) 
Epoch: [149][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5122(1.5122) Grad: 2.9830 LR: 0.000406  
Epoch: [149][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5791(1.6091) Grad: 2.9984 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2278(2.2278) 


Epoch 149 - avg_train_loss: 1.6091  avg_val_loss: 2.2513  time: 3s
Epoch 149 - Score: 0.4690


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3350(2.2513) 
Epoch: [150][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4880(1.4880) Grad: 2.9908 LR: 0.000409  
Epoch: [150][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7150(1.6895) Grad: 2.8318 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4065(2.4065) 


Epoch 150 - avg_train_loss: 1.6895  avg_val_loss: 2.4308  time: 3s
Epoch 150 - Score: 0.4323


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4751(2.4308) 
Epoch: [151][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.6294(1.6294) Grad: 2.7284 LR: 0.000365  
Epoch: [151][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7557(1.7293) Grad: 2.7907 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.7921(2.7921) 


Epoch 151 - avg_train_loss: 1.7293  avg_val_loss: 2.8405  time: 3s
Epoch 151 - Score: 0.3338


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9742(2.8405) 
Epoch: [152][0/138] Elapsed 0m 0s (remain 0m 32s) Loss: 1.6919(1.6919) Grad: 3.0965 LR: 0.000278  
Epoch: [152][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7216(1.7089) Grad: 2.8617 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4750(2.4750) 


Epoch 152 - avg_train_loss: 1.7089  avg_val_loss: 2.5299  time: 4s
Epoch 152 - Score: 0.4022


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5679(2.5299) 
Epoch: [153][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6068(1.6068) Grad: 2.5587 LR: 0.000174  
Epoch: [153][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6734(1.6393) Grad: 2.8543 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 9s) Loss: 2.2624(2.2624) 


Epoch 153 - avg_train_loss: 1.6393  avg_val_loss: 2.3227  time: 4s
Epoch 153 - Score: 0.4462


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.3214(2.3227) 
Epoch: [154][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6379(1.6379) Grad: 2.8828 LR: 0.000085  
Epoch: [154][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6512(1.5549) Grad: 2.8158 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9869(1.9869) 


Epoch 154 - avg_train_loss: 1.5549  avg_val_loss: 2.0694  time: 3s
Epoch 154 - Score: 0.5144


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1207(2.0694) 
Epoch: [155][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.5201(1.5201) Grad: 3.0215 LR: 0.000038  
Epoch: [155][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3358(1.4682) Grad: 2.6164 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9083(1.9083) 


Epoch 155 - avg_train_loss: 1.4682  avg_val_loss: 1.9745  time: 4s
Epoch 155 - Score: 0.5377


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0235(1.9745) 
Epoch: [156][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5966(1.5966) Grad: 2.9489 LR: 0.000030  
Epoch: [156][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2847(1.4349) Grad: 2.6355 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8684(1.8684) 


Epoch 156 - avg_train_loss: 1.4349  avg_val_loss: 1.9344  time: 3s
Epoch 156 - Score: 0.5492
Epoch 156 - Save Best score: 0.5492 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9858(1.9344) 
Epoch: [157][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3369(1.3369) Grad: 2.7038 LR: 0.000088  
Epoch: [157][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4296(1.4292) Grad: 2.7903 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8965(1.8965) 


Epoch 157 - avg_train_loss: 1.4292  avg_val_loss: 1.9470  time: 3s
Epoch 157 - Score: 0.5430


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9876(1.9470) 
Epoch: [158][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3637(1.3637) Grad: 2.9262 LR: 0.000409  
Epoch: [158][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5678(1.4951) Grad: 3.1174 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0373(2.0373) 


Epoch 158 - avg_train_loss: 1.4951  avg_val_loss: 2.0977  time: 4s
Epoch 158 - Score: 0.5075


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1305(2.0977) 
Epoch: [159][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4483(1.4483) Grad: 2.8667 LR: 0.000406  
Epoch: [159][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5474(1.5764) Grad: 2.8344 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4125(2.4125) 


Epoch 159 - avg_train_loss: 1.5764  avg_val_loss: 2.4373  time: 3s
Epoch 159 - Score: 0.4264


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5203(2.4373) 
Epoch: [160][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6504(1.6504) Grad: 2.8300 LR: 0.000409  
Epoch: [160][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5395(1.6736) Grad: 2.5415 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.6092(2.6092) 


Epoch 160 - avg_train_loss: 1.6736  avg_val_loss: 2.6878  time: 3s
Epoch 160 - Score: 0.3701


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7790(2.6878) 
Epoch: [161][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6631(1.6631) Grad: 2.8842 LR: 0.000365  
Epoch: [161][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5703(1.7098) Grad: 2.7042 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5301(2.5301) 


Epoch 161 - avg_train_loss: 1.7098  avg_val_loss: 2.5996  time: 4s
Epoch 161 - Score: 0.3946


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6530(2.5996) 
Epoch: [162][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.7537(1.7537) Grad: 2.9451 LR: 0.000278  
Epoch: [162][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7139(1.6931) Grad: 2.8093 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.6569(2.6569) 


Epoch 162 - avg_train_loss: 1.6931  avg_val_loss: 2.7077  time: 4s
Epoch 162 - Score: 0.3644


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.8219(2.7077) 
Epoch: [163][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5526(1.5526) Grad: 2.9038 LR: 0.000174  
Epoch: [163][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7084(1.6176) Grad: 2.9005 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1282(2.1282) 


Epoch 163 - avg_train_loss: 1.6176  avg_val_loss: 2.2055  time: 3s
Epoch 163 - Score: 0.4776


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3243(2.2055) 
Epoch: [164][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.5714(1.5714) Grad: 2.7458 LR: 0.000085  
Epoch: [164][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5160(1.5358) Grad: 2.7478 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9150(1.9150) 


Epoch 164 - avg_train_loss: 1.5358  avg_val_loss: 2.0142  time: 4s
Epoch 164 - Score: 0.5264


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0543(2.0142) 
Epoch: [165][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.5331(1.5331) Grad: 2.8531 LR: 0.000038  
Epoch: [165][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4022(1.4456) Grad: 2.5605 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8781(1.8781) 


Epoch 165 - avg_train_loss: 1.4456  avg_val_loss: 1.9483  time: 3s
Epoch 165 - Score: 0.5439


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0191(1.9483) 
Epoch: [166][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3351(1.3351) Grad: 2.5400 LR: 0.000030  
Epoch: [166][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3484(1.4034) Grad: 2.6320 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8729(1.8729) 


Epoch 166 - avg_train_loss: 1.4034  avg_val_loss: 1.9250  time: 3s
Epoch 166 - Score: 0.5484


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9723(1.9250) 
Epoch: [167][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3587(1.3587) Grad: 2.9506 LR: 0.000088  
Epoch: [167][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4121(1.4141) Grad: 2.9350 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.8667(1.8667) 


Epoch 167 - avg_train_loss: 1.4141  avg_val_loss: 1.9277  time: 4s
Epoch 167 - Score: 0.5428


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9827(1.9277) 
Epoch: [168][0/138] Elapsed 0m 0s (remain 0m 19s) Loss: 1.3380(1.3380) Grad: 2.7570 LR: 0.000409  
Epoch: [168][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4833(1.4666) Grad: 2.7041 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9818(1.9818) 


Epoch 168 - avg_train_loss: 1.4666  avg_val_loss: 2.0320  time: 3s
Epoch 168 - Score: 0.5195


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0817(2.0320) 
Epoch: [169][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3801(1.3801) Grad: 3.0239 LR: 0.000406  
Epoch: [169][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5959(1.5540) Grad: 3.0982 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.1490(2.1490) 


Epoch 169 - avg_train_loss: 1.5540  avg_val_loss: 2.2071  time: 3s
Epoch 169 - Score: 0.4730


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2445(2.2071) 
Epoch: [170][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4819(1.4819) Grad: 2.8307 LR: 0.000409  
Epoch: [170][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6041(1.6497) Grad: 2.7804 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.6247(2.6247) 


Epoch 170 - avg_train_loss: 1.6497  avg_val_loss: 2.6986  time: 4s
Epoch 170 - Score: 0.3754


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7316(2.6986) 
Epoch: [171][0/138] Elapsed 0m 0s (remain 0m 26s) Loss: 1.6465(1.6465) Grad: 2.9474 LR: 0.000365  
Epoch: [171][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5967(1.6874) Grad: 2.8236 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5555(2.5555) 


Epoch 171 - avg_train_loss: 1.6874  avg_val_loss: 2.6196  time: 4s
Epoch 171 - Score: 0.3990


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.6803(2.6196) 
Epoch: [172][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6798(1.6798) Grad: 2.8890 LR: 0.000278  
Epoch: [172][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6176(1.6661) Grad: 2.7884 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5167(2.5167) 


Epoch 172 - avg_train_loss: 1.6661  avg_val_loss: 2.5648  time: 3s
Epoch 172 - Score: 0.4076


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6227(2.5648) 
Epoch: [173][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6552(1.6552) Grad: 2.7878 LR: 0.000174  
Epoch: [173][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6298(1.5967) Grad: 2.7322 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3466(2.3466) 


Epoch 173 - avg_train_loss: 1.5967  avg_val_loss: 2.3824  time: 3s
Epoch 173 - Score: 0.4444


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4562(2.3824) 
Epoch: [174][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.6374(1.6374) Grad: 2.9353 LR: 0.000085  
Epoch: [174][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4924(1.5126) Grad: 2.9542 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9597(1.9597) 


Epoch 174 - avg_train_loss: 1.5126  avg_val_loss: 2.0359  time: 3s
Epoch 174 - Score: 0.5146


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1147(2.0359) 
Epoch: [175][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6116(1.6116) Grad: 2.9145 LR: 0.000038  
Epoch: [175][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1938(1.4271) Grad: 2.7361 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8569(1.8569) 


Epoch 175 - avg_train_loss: 1.4271  avg_val_loss: 1.9303  time: 3s
Epoch 175 - Score: 0.5474


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9808(1.9303) 
Epoch: [176][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2973(1.2973) Grad: 2.5977 LR: 0.000030  
Epoch: [176][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4394(1.3876) Grad: 2.7921 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8576(1.8576) 


Epoch 176 - avg_train_loss: 1.3876  avg_val_loss: 1.9213  time: 3s
Epoch 176 - Score: 0.5502
Epoch 176 - Save Best score: 0.5502 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9754(1.9213) 
Epoch: [177][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3924(1.3924) Grad: 2.8188 LR: 0.000088  
Epoch: [177][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3501(1.3989) Grad: 2.7546 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8994(1.8994) 


Epoch 177 - avg_train_loss: 1.3989  avg_val_loss: 1.9404  time: 4s
Epoch 177 - Score: 0.5464


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9903(1.9404) 
Epoch: [178][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2599(1.2599) Grad: 2.5295 LR: 0.000409  
Epoch: [178][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5564(1.4478) Grad: 2.9486 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.0283(2.0283) 


Epoch 178 - avg_train_loss: 1.4478  avg_val_loss: 2.0885  time: 3s
Epoch 178 - Score: 0.5158


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1630(2.0885) 
Epoch: [179][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4330(1.4330) Grad: 2.9017 LR: 0.000406  
Epoch: [179][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5663(1.5384) Grad: 2.7020 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2261(2.2261) 


Epoch 179 - avg_train_loss: 1.5384  avg_val_loss: 2.2964  time: 3s
Epoch 179 - Score: 0.4658


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3333(2.2964) 
Epoch: [180][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.6917(1.6917) Grad: 2.9665 LR: 0.000409  
Epoch: [180][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6416(1.6238) Grad: 2.9677 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.3975(2.3975) 


Epoch 180 - avg_train_loss: 1.6238  avg_val_loss: 2.4487  time: 4s
Epoch 180 - Score: 0.4304


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.5091(2.4487) 
Epoch: [181][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5978(1.5978) Grad: 2.7442 LR: 0.000365  
Epoch: [181][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7580(1.6708) Grad: 2.7009 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6277(2.6277) 


Epoch 181 - avg_train_loss: 1.6708  avg_val_loss: 2.7414  time: 3s
Epoch 181 - Score: 0.3587


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7552(2.7414) 
Epoch: [182][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.7524(1.7524) Grad: 3.0685 LR: 0.000278  
Epoch: [182][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6405(1.6549) Grad: 2.9515 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5596(2.5596) 


Epoch 182 - avg_train_loss: 1.6549  avg_val_loss: 2.7216  time: 3s
Epoch 182 - Score: 0.3815


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7682(2.7216) 
Epoch: [183][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6238(1.6238) Grad: 2.7932 LR: 0.000174  
Epoch: [183][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6267(1.5888) Grad: 2.6792 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0079(2.0079) 


Epoch 183 - avg_train_loss: 1.5888  avg_val_loss: 2.1279  time: 4s
Epoch 183 - Score: 0.4973


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1728(2.1279) 
Epoch: [184][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4904(1.4904) Grad: 2.7483 LR: 0.000085  
Epoch: [184][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5344(1.4912) Grad: 2.8035 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9781(1.9781) 


Epoch 184 - avg_train_loss: 1.4912  avg_val_loss: 2.0631  time: 3s
Epoch 184 - Score: 0.5142


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1151(2.0631) 
Epoch: [185][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4471(1.4471) Grad: 2.7335 LR: 0.000038  
Epoch: [185][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4477(1.4159) Grad: 2.8850 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8416(1.8416) 


Epoch 185 - avg_train_loss: 1.4159  avg_val_loss: 1.9201  time: 3s
Epoch 185 - Score: 0.5494


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9435(1.9201) 
Epoch: [186][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4010(1.4010) Grad: 2.8759 LR: 0.000030  
Epoch: [186][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3489(1.3709) Grad: 2.8769 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.8622(1.8622) 


Epoch 186 - avg_train_loss: 1.3709  avg_val_loss: 1.9071  time: 4s
Epoch 186 - Score: 0.5528
Epoch 186 - Save Best score: 0.5528 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9361(1.9071) 
Epoch: [187][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3634(1.3634) Grad: 2.8470 LR: 0.000088  
Epoch: [187][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3654(1.3780) Grad: 2.8519 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.9016(1.9016) 


Epoch 187 - avg_train_loss: 1.3780  avg_val_loss: 1.9571  time: 3s
Epoch 187 - Score: 0.5400


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9934(1.9571) 
Epoch: [188][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3925(1.3925) Grad: 2.8873 LR: 0.000409  
Epoch: [188][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3308(1.4366) Grad: 2.6956 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0460(2.0460) 


Epoch 188 - avg_train_loss: 1.4366  avg_val_loss: 2.1147  time: 3s
Epoch 188 - Score: 0.5061


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1870(2.1147) 
Epoch: [189][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5205(1.5205) Grad: 2.8580 LR: 0.000406  
Epoch: [189][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4524(1.5218) Grad: 2.8592 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1699(2.1699) 
EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.3416(2.2887) 


Epoch 189 - avg_train_loss: 1.5218  avg_val_loss: 2.2887  time: 4s
Epoch 189 - Score: 0.4620


Epoch: [190][0/138] Elapsed 0m 0s (remain 0m 41s) Loss: 1.5025(1.5025) Grad: 2.9480 LR: 0.000409  
Epoch: [190][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5588(1.6091) Grad: 2.8198 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.7067(2.7067) 


Epoch 190 - avg_train_loss: 1.6091  avg_val_loss: 2.7072  time: 3s
Epoch 190 - Score: 0.3784


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7561(2.7072) 
Epoch: [191][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6957(1.6957) Grad: 3.2267 LR: 0.000365  
Epoch: [191][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5703(1.6588) Grad: 2.7919 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.3961(2.3961) 


Epoch 191 - avg_train_loss: 1.6588  avg_val_loss: 2.4577  time: 3s
Epoch 191 - Score: 0.4118


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4924(2.4577) 
Epoch: [192][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.6190(1.6190) Grad: 2.7701 LR: 0.000278  
Epoch: [192][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6853(1.6307) Grad: 3.0420 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5584(2.5584) 


Epoch 192 - avg_train_loss: 1.6307  avg_val_loss: 2.5934  time: 4s
Epoch 192 - Score: 0.3835


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6406(2.5934) 
Epoch: [193][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.5584(1.5584) Grad: 2.7926 LR: 0.000174  
Epoch: [193][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5462(1.5682) Grad: 2.8555 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1955(2.1955) 


Epoch 193 - avg_train_loss: 1.5682  avg_val_loss: 2.2513  time: 3s
Epoch 193 - Score: 0.4616


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2994(2.2513) 
Epoch: [194][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4454(1.4454) Grad: 2.6330 LR: 0.000085  
Epoch: [194][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4472(1.4815) Grad: 2.8423 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9961(1.9961) 


Epoch 194 - avg_train_loss: 1.4815  avg_val_loss: 2.0654  time: 3s
Epoch 194 - Score: 0.5144


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1313(2.0654) 
Epoch: [195][0/138] Elapsed 0m 0s (remain 0m 19s) Loss: 1.3231(1.3231) Grad: 2.6659 LR: 0.000038  
Epoch: [195][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4965(1.4018) Grad: 3.1422 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9227(1.9227) 


Epoch 195 - avg_train_loss: 1.4018  avg_val_loss: 1.9688  time: 3s
Epoch 195 - Score: 0.5420


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0533(1.9688) 
Epoch: [196][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3535(1.3535) Grad: 2.7149 LR: 0.000030  
Epoch: [196][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2761(1.3603) Grad: 2.8144 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.8633(1.8633) 


Epoch 196 - avg_train_loss: 1.3603  avg_val_loss: 1.9247  time: 4s
Epoch 196 - Score: 0.5523


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9949(1.9247) 
Epoch: [197][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3097(1.3097) Grad: 2.7474 LR: 0.000088  
Epoch: [197][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3496(1.3672) Grad: 2.7134 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8849(1.8849) 


Epoch 197 - avg_train_loss: 1.3672  avg_val_loss: 1.9533  time: 3s
Epoch 197 - Score: 0.5445


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0258(1.9533) 
Epoch: [198][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3057(1.3057) Grad: 2.7587 LR: 0.000409  
Epoch: [198][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4945(1.4178) Grad: 2.9800 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9810(1.9810) 


Epoch 198 - avg_train_loss: 1.4178  avg_val_loss: 2.0412  time: 4s
Epoch 198 - Score: 0.5157


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0833(2.0412) 
Epoch: [199][0/138] Elapsed 0m 0s (remain 0m 32s) Loss: 1.4040(1.4040) Grad: 2.8562 LR: 0.000406  
Epoch: [199][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.5896(1.5118) Grad: 3.0245 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4249(2.4249) 


Epoch 199 - avg_train_loss: 1.5118  avg_val_loss: 2.4618  time: 4s
Epoch 199 - Score: 0.4148


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5006(2.4618) 
Epoch: [200][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3999(1.3999) Grad: 2.6455 LR: 0.000409  
Epoch: [200][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6412(1.6016) Grad: 2.9704 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4485(2.4485) 


Epoch 200 - avg_train_loss: 1.6016  avg_val_loss: 2.4877  time: 3s
Epoch 200 - Score: 0.4216


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5305(2.4877) 
Epoch: [201][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.7263(1.7263) Grad: 2.9253 LR: 0.000365  
Epoch: [201][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7101(1.6445) Grad: 2.8305 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5007(2.5007) 


Epoch 201 - avg_train_loss: 1.6445  avg_val_loss: 2.5408  time: 3s
Epoch 201 - Score: 0.4153


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6403(2.5408) 
Epoch: [202][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6195(1.6195) Grad: 2.8135 LR: 0.000278  
Epoch: [202][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5879(1.6363) Grad: 2.6022 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5096(2.5096) 


Epoch 202 - avg_train_loss: 1.6363  avg_val_loss: 2.5482  time: 4s
Epoch 202 - Score: 0.4009


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6266(2.5482) 
Epoch: [203][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5426(1.5426) Grad: 2.6933 LR: 0.000174  
Epoch: [203][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6321(1.5608) Grad: 3.1098 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4289(2.4289) 


Epoch 203 - avg_train_loss: 1.5608  avg_val_loss: 2.5594  time: 3s
Epoch 203 - Score: 0.4035


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6728(2.5594) 
Epoch: [204][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4764(1.4764) Grad: 2.6275 LR: 0.000085  
Epoch: [204][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5142(1.4620) Grad: 2.8672 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9573(1.9573) 


Epoch 204 - avg_train_loss: 1.4620  avg_val_loss: 2.0128  time: 3s
Epoch 204 - Score: 0.5270


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0589(2.0128) 
Epoch: [205][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4522(1.4522) Grad: 2.8498 LR: 0.000038  
Epoch: [205][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3864(1.3927) Grad: 2.6572 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 11s) Loss: 1.8506(1.8506) 


Epoch 205 - avg_train_loss: 1.3927  avg_val_loss: 1.9036  time: 4s
Epoch 205 - Score: 0.5513


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9477(1.9036) 
Epoch: [206][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4704(1.4704) Grad: 2.7377 LR: 0.000030  
Epoch: [206][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3725(1.3423) Grad: 2.6390 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8326(1.8326) 


Epoch 206 - avg_train_loss: 1.3423  avg_val_loss: 1.8883  time: 3s
Epoch 206 - Score: 0.5561
Epoch 206 - Save Best score: 0.5561 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9377(1.8883) 
Epoch: [207][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2768(1.2768) Grad: 2.7189 LR: 0.000088  
Epoch: [207][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4878(1.3526) Grad: 2.9764 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8540(1.8540) 


Epoch 207 - avg_train_loss: 1.3526  avg_val_loss: 1.9117  time: 3s
Epoch 207 - Score: 0.5510


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9651(1.9117) 
Epoch: [208][0/138] Elapsed 0m 0s (remain 0m 28s) Loss: 1.3083(1.3083) Grad: 2.6274 LR: 0.000409  
Epoch: [208][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.4772(1.4090) Grad: 2.9702 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9521(1.9521) 


Epoch 208 - avg_train_loss: 1.4090  avg_val_loss: 2.0176  time: 4s
Epoch 208 - Score: 0.5210


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0565(2.0176) 
Epoch: [209][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3774(1.3774) Grad: 2.8269 LR: 0.000406  
Epoch: [209][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4528(1.4908) Grad: 2.7569 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1899(2.1899) 


Epoch 209 - avg_train_loss: 1.4908  avg_val_loss: 2.2621  time: 3s
Epoch 209 - Score: 0.4746


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2870(2.2621) 
Epoch: [210][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5042(1.5042) Grad: 2.7535 LR: 0.000409  
Epoch: [210][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5507(1.5898) Grad: 2.6757 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4475(2.4475) 


Epoch 210 - avg_train_loss: 1.5898  avg_val_loss: 2.4793  time: 3s
Epoch 210 - Score: 0.4212


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4770(2.4793) 
Epoch: [211][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5382(1.5382) Grad: 2.5987 LR: 0.000365  
Epoch: [211][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6444(1.6263) Grad: 2.8416 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.5413(2.5413) 


Epoch 211 - avg_train_loss: 1.6263  avg_val_loss: 2.6295  time: 4s
Epoch 211 - Score: 0.3930


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6588(2.6295) 
Epoch: [212][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5676(1.5676) Grad: 2.6885 LR: 0.000278  
Epoch: [212][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6705(1.6014) Grad: 2.7714 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4223(2.4223) 


Epoch 212 - avg_train_loss: 1.6014  avg_val_loss: 2.5059  time: 3s
Epoch 212 - Score: 0.4154


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5450(2.5059) 
Epoch: [213][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4787(1.4787) Grad: 2.7485 LR: 0.000174  
Epoch: [213][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5128(1.5367) Grad: 2.8498 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1876(2.1876) 


Epoch 213 - avg_train_loss: 1.5367  avg_val_loss: 2.2618  time: 3s
Epoch 213 - Score: 0.4625


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3084(2.2618) 
Epoch: [214][0/138] Elapsed 0m 0s (remain 0m 26s) Loss: 1.4447(1.4447) Grad: 2.5181 LR: 0.000085  
Epoch: [214][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4037(1.4498) Grad: 2.6341 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.0111(2.0111) 


Epoch 214 - avg_train_loss: 1.4498  avg_val_loss: 2.0800  time: 4s
Epoch 214 - Score: 0.5070


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1719(2.0800) 
Epoch: [215][0/138] Elapsed 0m 0s (remain 0m 24s) Loss: 1.4396(1.4396) Grad: 2.6271 LR: 0.000038  
Epoch: [215][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3434(1.3708) Grad: 2.7492 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8627(1.8627) 


Epoch 215 - avg_train_loss: 1.3708  avg_val_loss: 1.9229  time: 3s
Epoch 215 - Score: 0.5475


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9454(1.9229) 
Epoch: [216][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3830(1.3830) Grad: 2.8479 LR: 0.000030  
Epoch: [216][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3111(1.3336) Grad: 2.7692 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8452(1.8452) 


Epoch 216 - avg_train_loss: 1.3336  avg_val_loss: 1.9083  time: 3s
Epoch 216 - Score: 0.5525


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9548(1.9083) 
Epoch: [217][0/138] Elapsed 0m 0s (remain 0m 30s) Loss: 1.2479(1.2479) Grad: 2.8496 LR: 0.000088  
Epoch: [217][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3798(1.3365) Grad: 2.8318 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8407(1.8407) 


Epoch 217 - avg_train_loss: 1.3365  avg_val_loss: 1.9068  time: 4s
Epoch 217 - Score: 0.5505


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9544(1.9068) 
Epoch: [218][0/138] Elapsed 0m 0s (remain 0m 28s) Loss: 1.3027(1.3027) Grad: 2.5555 LR: 0.000409  
Epoch: [218][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3654(1.3915) Grad: 2.6608 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0160(2.0160) 


Epoch 218 - avg_train_loss: 1.3915  avg_val_loss: 2.0350  time: 4s
Epoch 218 - Score: 0.5206


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0741(2.0350) 
Epoch: [219][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3471(1.3471) Grad: 2.8737 LR: 0.000406  
Epoch: [219][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4724(1.4791) Grad: 2.8096 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1074(2.1074) 


Epoch 219 - avg_train_loss: 1.4791  avg_val_loss: 2.2116  time: 3s
Epoch 219 - Score: 0.4751


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3238(2.2116) 
Epoch: [220][0/138] Elapsed 0m 0s (remain 0m 19s) Loss: 1.3733(1.3733) Grad: 2.7581 LR: 0.000409  
Epoch: [220][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6600(1.5712) Grad: 2.7536 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5821(2.5821) 


Epoch 220 - avg_train_loss: 1.5712  avg_val_loss: 2.6661  time: 3s
Epoch 220 - Score: 0.3863


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6559(2.6661) 
Epoch: [221][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5854(1.5854) Grad: 3.0918 LR: 0.000365  
Epoch: [221][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6476(1.6077) Grad: 2.8562 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.7744(2.7744) 


Epoch 221 - avg_train_loss: 1.6077  avg_val_loss: 2.8560  time: 4s
Epoch 221 - Score: 0.3395


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.0141(2.8560) 
Epoch: [222][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6892(1.6892) Grad: 2.8038 LR: 0.000278  
Epoch: [222][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4318(1.6016) Grad: 2.5131 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3510(2.3510) 


Epoch 222 - avg_train_loss: 1.6016  avg_val_loss: 2.4532  time: 3s
Epoch 222 - Score: 0.4160


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5849(2.4532) 
Epoch: [223][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4882(1.4882) Grad: 2.8200 LR: 0.000174  
Epoch: [223][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5556(1.5299) Grad: 2.6984 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.2326(2.2326) 


Epoch 223 - avg_train_loss: 1.5299  avg_val_loss: 2.3485  time: 3s
Epoch 223 - Score: 0.4532


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4409(2.3485) 
Epoch: [224][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3789(1.3789) Grad: 2.5435 LR: 0.000085  
Epoch: [224][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3587(1.4471) Grad: 2.8917 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9773(1.9773) 


Epoch 224 - avg_train_loss: 1.4471  avg_val_loss: 2.0184  time: 4s
Epoch 224 - Score: 0.5201


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0779(2.0184) 
Epoch: [225][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4515(1.4515) Grad: 2.4826 LR: 0.000038  
Epoch: [225][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4418(1.3660) Grad: 2.7517 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8563(1.8563) 


Epoch 225 - avg_train_loss: 1.3660  avg_val_loss: 1.9293  time: 3s
Epoch 225 - Score: 0.5489


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9994(1.9293) 
Epoch: [226][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2915(1.2915) Grad: 2.6861 LR: 0.000030  
Epoch: [226][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2825(1.3164) Grad: 2.9782 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8534(1.8534) 


Epoch 226 - avg_train_loss: 1.3164  avg_val_loss: 1.9195  time: 4s
Epoch 226 - Score: 0.5537


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9892(1.9195) 
Epoch: [227][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3234(1.3234) Grad: 2.5115 LR: 0.000088  
Epoch: [227][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4336(1.3295) Grad: 2.9094 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8714(1.8714) 


Epoch 227 - avg_train_loss: 1.3295  avg_val_loss: 1.9513  time: 4s
Epoch 227 - Score: 0.5434


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0144(1.9513) 
Epoch: [228][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2610(1.2610) Grad: 2.8519 LR: 0.000409  
Epoch: [228][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5162(1.3837) Grad: 2.9493 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0269(2.0269) 


Epoch 228 - avg_train_loss: 1.3837  avg_val_loss: 2.0950  time: 3s
Epoch 228 - Score: 0.5089


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1615(2.0950) 
Epoch: [229][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3955(1.3955) Grad: 2.9286 LR: 0.000406  
Epoch: [229][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5680(1.4664) Grad: 2.8449 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1391(2.1391) 


Epoch 229 - avg_train_loss: 1.4664  avg_val_loss: 2.2267  time: 3s
Epoch 229 - Score: 0.4769


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3121(2.2267) 
Epoch: [230][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4311(1.4311) Grad: 2.5182 LR: 0.000409  
Epoch: [230][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7180(1.5617) Grad: 2.8558 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.4737(2.4737) 


Epoch 230 - avg_train_loss: 1.5617  avg_val_loss: 2.5355  time: 4s
Epoch 230 - Score: 0.4113


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6750(2.5355) 
Epoch: [231][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5135(1.5135) Grad: 2.6567 LR: 0.000365  
Epoch: [231][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7372(1.6037) Grad: 3.3291 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.9768(2.9768) 


Epoch 231 - avg_train_loss: 1.6037  avg_val_loss: 3.0522  time: 3s
Epoch 231 - Score: 0.3100


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.0898(3.0522) 
Epoch: [232][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4116(1.4116) Grad: 2.5635 LR: 0.000278  
Epoch: [232][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.8320(1.5910) Grad: 3.0991 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 9s) Loss: 2.3824(2.3824) 


Epoch 232 - avg_train_loss: 1.5910  avg_val_loss: 2.4725  time: 3s
Epoch 232 - Score: 0.4204


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5195(2.4725) 
Epoch: [233][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3925(1.3925) Grad: 2.7005 LR: 0.000174  
Epoch: [233][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.7195(1.5150) Grad: 2.9648 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1171(2.1171) 


Epoch 233 - avg_train_loss: 1.5150  avg_val_loss: 2.1818  time: 4s
Epoch 233 - Score: 0.4854


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2411(2.1818) 
Epoch: [234][0/138] Elapsed 0m 0s (remain 0m 28s) Loss: 1.5163(1.5163) Grad: 2.8811 LR: 0.000085  
Epoch: [234][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3878(1.4328) Grad: 3.0191 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0498(2.0498) 


Epoch 234 - avg_train_loss: 1.4328  avg_val_loss: 2.1424  time: 3s
Epoch 234 - Score: 0.4974


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1887(2.1424) 
Epoch: [235][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3618(1.3618) Grad: 2.6070 LR: 0.000038  
Epoch: [235][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3245(1.3512) Grad: 2.6737 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8405(1.8405) 


Epoch 235 - avg_train_loss: 1.3512  avg_val_loss: 1.9340  time: 4s
Epoch 235 - Score: 0.5501


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9851(1.9340) 
Epoch: [236][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3597(1.3597) Grad: 2.7451 LR: 0.000030  
Epoch: [236][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2014(1.3080) Grad: 2.6521 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8480(1.8480) 


Epoch 236 - avg_train_loss: 1.3080  avg_val_loss: 1.9133  time: 3s
Epoch 236 - Score: 0.5558


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9594(1.9133) 
Epoch: [237][0/138] Elapsed 0m 0s (remain 0m 24s) Loss: 1.2198(1.2198) Grad: 2.8258 LR: 0.000088  
Epoch: [237][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3491(1.3125) Grad: 2.7114 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8932(1.8932) 


Epoch 237 - avg_train_loss: 1.3125  avg_val_loss: 1.9482  time: 3s
Epoch 237 - Score: 0.5459


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0135(1.9482) 
Epoch: [238][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1987(1.1987) Grad: 2.6258 LR: 0.000409  
Epoch: [238][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4638(1.3693) Grad: 2.8839 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0085(2.0085) 


Epoch 238 - avg_train_loss: 1.3693  avg_val_loss: 2.0940  time: 3s
Epoch 238 - Score: 0.5145


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1747(2.0940) 
Epoch: [239][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4565(1.4565) Grad: 2.7410 LR: 0.000406  
Epoch: [239][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5347(1.4639) Grad: 2.8814 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2652(2.2652) 


Epoch 239 - avg_train_loss: 1.4639  avg_val_loss: 2.3159  time: 3s
Epoch 239 - Score: 0.4545


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4127(2.3159) 
Epoch: [240][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5815(1.5815) Grad: 2.9701 LR: 0.000409  
Epoch: [240][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4570(1.5543) Grad: 2.6683 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3458(2.3458) 


Epoch 240 - avg_train_loss: 1.5543  avg_val_loss: 2.4027  time: 4s
Epoch 240 - Score: 0.4444


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4518(2.4027) 
Epoch: [241][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.5965(1.5965) Grad: 2.7824 LR: 0.000365  
Epoch: [241][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6166(1.5925) Grad: 2.8164 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.5239(2.5239) 


Epoch 241 - avg_train_loss: 1.5925  avg_val_loss: 2.5964  time: 3s
Epoch 241 - Score: 0.3954


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6074(2.5964) 
Epoch: [242][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6397(1.6397) Grad: 3.0318 LR: 0.000278  
Epoch: [242][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3809(1.5777) Grad: 2.6872 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3212(2.3212) 


Epoch 242 - avg_train_loss: 1.5777  avg_val_loss: 2.4003  time: 3s
Epoch 242 - Score: 0.4481


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5281(2.4003) 
Epoch: [243][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5707(1.5707) Grad: 2.6755 LR: 0.000174  
Epoch: [243][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5050(1.5030) Grad: 2.9234 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1447(2.1447) 


Epoch 243 - avg_train_loss: 1.5030  avg_val_loss: 2.2151  time: 4s
Epoch 243 - Score: 0.4739


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3001(2.2151) 
Epoch: [244][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3333(1.3333) Grad: 2.6434 LR: 0.000085  
Epoch: [244][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.4186(1.4206) Grad: 2.9216 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9263(1.9263) 


Epoch 244 - avg_train_loss: 1.4206  avg_val_loss: 2.0432  time: 4s
Epoch 244 - Score: 0.5217


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1373(2.0432) 
Epoch: [245][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3715(1.3715) Grad: 2.4415 LR: 0.000038  
Epoch: [245][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2794(1.3492) Grad: 2.5183 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8392(1.8392) 


Epoch 245 - avg_train_loss: 1.3492  avg_val_loss: 1.9067  time: 3s
Epoch 245 - Score: 0.5549


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9713(1.9067) 
Epoch: [246][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2477(1.2477) Grad: 2.6762 LR: 0.000030  
Epoch: [246][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2623(1.2987) Grad: 2.7802 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8517(1.8517) 


Epoch 246 - avg_train_loss: 1.2987  avg_val_loss: 1.9033  time: 4s
Epoch 246 - Score: 0.5581
Epoch 246 - Save Best score: 0.5581 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9919(1.9033) 
Epoch: [247][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2702(1.2702) Grad: 2.6501 LR: 0.000088  
Epoch: [247][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2098(1.3070) Grad: 2.5050 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8781(1.8781) 


Epoch 247 - avg_train_loss: 1.3070  avg_val_loss: 1.9218  time: 3s
Epoch 247 - Score: 0.5515


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0076(1.9218) 
Epoch: [248][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3866(1.3866) Grad: 2.8644 LR: 0.000409  
Epoch: [248][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3676(1.3565) Grad: 2.7466 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9956(1.9956) 


Epoch 248 - avg_train_loss: 1.3565  avg_val_loss: 2.0031  time: 3s
Epoch 248 - Score: 0.5283


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0366(2.0031) 
Epoch: [249][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4490(1.4490) Grad: 2.7402 LR: 0.000406  
Epoch: [249][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5589(1.4480) Grad: 2.7803 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.3560(2.3560) 


Epoch 249 - avg_train_loss: 1.4480  avg_val_loss: 2.4099  time: 4s
Epoch 249 - Score: 0.4372


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4829(2.4099) 
Epoch: [250][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3763(1.3763) Grad: 2.8075 LR: 0.000409  
Epoch: [250][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6434(1.5381) Grad: 2.8178 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.2082(2.2082) 


Epoch 250 - avg_train_loss: 1.5381  avg_val_loss: 2.3834  time: 3s
Epoch 250 - Score: 0.4443


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4982(2.3834) 
Epoch: [251][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.7091(1.7091) Grad: 2.9386 LR: 0.000365  
Epoch: [251][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6051(1.5817) Grad: 2.8322 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4815(2.4815) 


Epoch 251 - avg_train_loss: 1.5817  avg_val_loss: 2.5861  time: 3s
Epoch 251 - Score: 0.3961


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6402(2.5861) 
Epoch: [252][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4454(1.4454) Grad: 2.5194 LR: 0.000278  
Epoch: [252][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5991(1.5640) Grad: 3.0503 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.3108(2.3108) 


Epoch 252 - avg_train_loss: 1.5640  avg_val_loss: 2.3983  time: 4s
Epoch 252 - Score: 0.4372


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4658(2.3983) 
Epoch: [253][0/138] Elapsed 0m 0s (remain 0m 29s) Loss: 1.4788(1.4788) Grad: 2.7301 LR: 0.000174  
Epoch: [253][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.6268(1.5016) Grad: 2.8989 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2438(2.2438) 


Epoch 253 - avg_train_loss: 1.5016  avg_val_loss: 2.3139  time: 4s
Epoch 253 - Score: 0.4560


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3754(2.3139) 
Epoch: [254][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5048(1.5048) Grad: 2.6156 LR: 0.000085  
Epoch: [254][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4108(1.4075) Grad: 2.7205 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0489(2.0489) 


Epoch 254 - avg_train_loss: 1.4075  avg_val_loss: 2.0733  time: 3s
Epoch 254 - Score: 0.5224


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1563(2.0733) 
Epoch: [255][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4690(1.4690) Grad: 2.7747 LR: 0.000038  
Epoch: [255][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3164(1.3357) Grad: 2.6159 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8839(1.8839) 


Epoch 255 - avg_train_loss: 1.3357  avg_val_loss: 1.9403  time: 3s
Epoch 255 - Score: 0.5525


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0298(1.9403) 
Epoch: [256][0/138] Elapsed 0m 0s (remain 0m 24s) Loss: 1.2364(1.2364) Grad: 2.9047 LR: 0.000030  
Epoch: [256][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3114(1.2962) Grad: 2.9469 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8719(1.8719) 


Epoch 256 - avg_train_loss: 1.2962  avg_val_loss: 1.9126  time: 3s
Epoch 256 - Score: 0.5578


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9926(1.9126) 
Epoch: [257][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2354(1.2354) Grad: 2.7769 LR: 0.000088  
Epoch: [257][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3177(1.3018) Grad: 2.6457 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8549(1.8549) 


Epoch 257 - avg_train_loss: 1.3018  avg_val_loss: 1.9167  time: 3s
Epoch 257 - Score: 0.5526


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9967(1.9167) 
Epoch: [258][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2448(1.2448) Grad: 2.5878 LR: 0.000409  
Epoch: [258][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4418(1.3415) Grad: 2.7439 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9734(1.9734) 


Epoch 258 - avg_train_loss: 1.3415  avg_val_loss: 2.0339  time: 3s
Epoch 258 - Score: 0.5213


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0669(2.0339) 
Epoch: [259][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3267(1.3267) Grad: 2.8547 LR: 0.000406  
Epoch: [259][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4531(1.4365) Grad: 2.8494 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.2970(2.2970) 


Epoch 259 - avg_train_loss: 1.4365  avg_val_loss: 2.4105  time: 4s
Epoch 259 - Score: 0.4356


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5065(2.4105) 
Epoch: [260][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4292(1.4292) Grad: 3.0655 LR: 0.000409  
Epoch: [260][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5882(1.5207) Grad: 2.9314 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5486(2.5486) 


Epoch 260 - avg_train_loss: 1.5207  avg_val_loss: 2.6511  time: 3s
Epoch 260 - Score: 0.3771


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6581(2.6511) 
Epoch: [261][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.5350(1.5350) Grad: 2.5813 LR: 0.000365  
Epoch: [261][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6180(1.5607) Grad: 2.8370 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.9595(2.9595) 


Epoch 261 - avg_train_loss: 1.5607  avg_val_loss: 2.9939  time: 3s
Epoch 261 - Score: 0.3197


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.0563(2.9939) 
Epoch: [262][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.5090(1.5090) Grad: 2.8102 LR: 0.000278  
Epoch: [262][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.5404(1.5396) Grad: 2.6708 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2557(2.2557) 


Epoch 262 - avg_train_loss: 1.5396  avg_val_loss: 2.3346  time: 4s
Epoch 262 - Score: 0.4557


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4096(2.3346) 
Epoch: [263][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.7029(1.7029) Grad: 3.3659 LR: 0.000174  
Epoch: [263][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5642(1.4825) Grad: 2.9262 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0230(2.0230) 


Epoch 263 - avg_train_loss: 1.4825  avg_val_loss: 2.1340  time: 3s
Epoch 263 - Score: 0.4978


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2160(2.1340) 
Epoch: [264][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3475(1.3475) Grad: 2.6435 LR: 0.000085  
Epoch: [264][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2749(1.4041) Grad: 2.6509 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8971(1.8971) 


Epoch 264 - avg_train_loss: 1.4041  avg_val_loss: 1.9723  time: 3s
Epoch 264 - Score: 0.5303


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0440(1.9723) 
Epoch: [265][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4144(1.4144) Grad: 2.6052 LR: 0.000038  
Epoch: [265][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2847(1.3264) Grad: 2.7304 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8464(1.8464) 


Epoch 265 - avg_train_loss: 1.3264  avg_val_loss: 1.8994  time: 4s
Epoch 265 - Score: 0.5527


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9579(1.8994) 
Epoch: [266][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2941(1.2941) Grad: 2.6787 LR: 0.000030  
Epoch: [266][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2387(1.2869) Grad: 2.7402 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8415(1.8415) 


Epoch 266 - avg_train_loss: 1.2869  avg_val_loss: 1.9035  time: 3s
Epoch 266 - Score: 0.5556


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9504(1.9035) 
Epoch: [267][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3749(1.3749) Grad: 2.9213 LR: 0.000088  
Epoch: [267][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3350(1.2816) Grad: 3.0534 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8828(1.8828) 


Epoch 267 - avg_train_loss: 1.2816  avg_val_loss: 1.9287  time: 3s
Epoch 267 - Score: 0.5493


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9808(1.9287) 
Epoch: [268][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2739(1.2739) Grad: 2.7236 LR: 0.000409  
Epoch: [268][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3511(1.3438) Grad: 2.7782 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.0368(2.0368) 


Epoch 268 - avg_train_loss: 1.3438  avg_val_loss: 2.0607  time: 4s
Epoch 268 - Score: 0.5221


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1356(2.0607) 
Epoch: [269][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3747(1.3747) Grad: 2.7674 LR: 0.000406  
Epoch: [269][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3984(1.4248) Grad: 2.6197 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1359(2.1359) 


Epoch 269 - avg_train_loss: 1.4248  avg_val_loss: 2.2202  time: 3s
Epoch 269 - Score: 0.4867


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3037(2.2202) 
Epoch: [270][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4382(1.4382) Grad: 2.8018 LR: 0.000409  
Epoch: [270][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5480(1.5300) Grad: 2.8839 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5003(2.5003) 


Epoch 270 - avg_train_loss: 1.5300  avg_val_loss: 2.5704  time: 3s
Epoch 270 - Score: 0.3979


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6488(2.5704) 
Epoch: [271][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5724(1.5724) Grad: 2.8786 LR: 0.000365  
Epoch: [271][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.7664(1.5655) Grad: 2.8595 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.6965(2.6965) 


Epoch 271 - avg_train_loss: 1.5655  avg_val_loss: 2.7467  time: 4s
Epoch 271 - Score: 0.3698


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8124(2.7467) 
Epoch: [272][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6494(1.6494) Grad: 2.7750 LR: 0.000278  
Epoch: [272][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5427(1.5519) Grad: 2.7493 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5716(2.5716) 


Epoch 272 - avg_train_loss: 1.5519  avg_val_loss: 2.7051  time: 3s
Epoch 272 - Score: 0.3647


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8579(2.7051) 
Epoch: [273][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5253(1.5253) Grad: 2.8985 LR: 0.000174  
Epoch: [273][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3978(1.4786) Grad: 2.8303 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1342(2.1342) 


Epoch 273 - avg_train_loss: 1.4786  avg_val_loss: 2.2123  time: 3s
Epoch 273 - Score: 0.4725


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2507(2.2123) 
Epoch: [274][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4774(1.4774) Grad: 2.7125 LR: 0.000085  
Epoch: [274][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3154(1.3943) Grad: 2.6618 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9703(1.9703) 


Epoch 274 - avg_train_loss: 1.3943  avg_val_loss: 2.0475  time: 4s
Epoch 274 - Score: 0.5193


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1093(2.0475) 
Epoch: [275][0/138] Elapsed 0m 0s (remain 0m 27s) Loss: 1.4692(1.4692) Grad: 3.0548 LR: 0.000038  
Epoch: [275][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4077(1.3196) Grad: 2.6868 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8671(1.8671) 


Epoch 275 - avg_train_loss: 1.3196  avg_val_loss: 1.9297  time: 3s
Epoch 275 - Score: 0.5537


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9940(1.9297) 
Epoch: [276][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2433(1.2433) Grad: 2.4770 LR: 0.000030  
Epoch: [276][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3308(1.2812) Grad: 2.6348 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8282(1.8282) 


Epoch 276 - avg_train_loss: 1.2812  avg_val_loss: 1.8882  time: 3s
Epoch 276 - Score: 0.5598
Epoch 276 - Save Best score: 0.5598 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9309(1.8882) 
Epoch: [277][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2513(1.2513) Grad: 2.7755 LR: 0.000088  
Epoch: [277][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2546(1.2813) Grad: 2.5769 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.8522(1.8522) 


Epoch 277 - avg_train_loss: 1.2813  avg_val_loss: 1.9050  time: 3s
Epoch 277 - Score: 0.5508


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9543(1.9050) 
Epoch: [278][0/138] Elapsed 0m 0s (remain 0m 24s) Loss: 1.3722(1.3722) Grad: 3.1960 LR: 0.000409  
Epoch: [278][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2994(1.3301) Grad: 2.6292 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0227(2.0227) 


Epoch 278 - avg_train_loss: 1.3301  avg_val_loss: 2.0705  time: 4s
Epoch 278 - Score: 0.5109


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1122(2.0705) 
Epoch: [279][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3643(1.3643) Grad: 2.6545 LR: 0.000406  
Epoch: [279][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5237(1.4134) Grad: 2.8913 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2671(2.2671) 


Epoch 279 - avg_train_loss: 1.4134  avg_val_loss: 2.3638  time: 3s
Epoch 279 - Score: 0.4559


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4228(2.3638) 
Epoch: [280][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3181(1.3181) Grad: 2.6699 LR: 0.000409  
Epoch: [280][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.4892(1.5141) Grad: 2.7293 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.6573(2.6573) 


Epoch 280 - avg_train_loss: 1.5141  avg_val_loss: 2.8075  time: 4s
Epoch 280 - Score: 0.3486


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8662(2.8075) 
Epoch: [281][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4432(1.4432) Grad: 2.7885 LR: 0.000365  
Epoch: [281][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5456(1.5666) Grad: 2.8065 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6175(2.6175) 


Epoch 281 - avg_train_loss: 1.5666  avg_val_loss: 2.6936  time: 4s
Epoch 281 - Score: 0.3762


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7975(2.6936) 
Epoch: [282][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6152(1.6152) Grad: 2.9926 LR: 0.000278  
Epoch: [282][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3938(1.5500) Grad: 2.4107 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4761(2.4761) 


Epoch 282 - avg_train_loss: 1.5500  avg_val_loss: 2.5763  time: 3s
Epoch 282 - Score: 0.3937


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6992(2.5763) 
Epoch: [283][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5393(1.5393) Grad: 2.7631 LR: 0.000174  
Epoch: [283][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4323(1.4786) Grad: 2.7657 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1905(2.1905) 


Epoch 283 - avg_train_loss: 1.4786  avg_val_loss: 2.2400  time: 3s
Epoch 283 - Score: 0.4671


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2652(2.2400) 
Epoch: [284][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4289(1.4289) Grad: 2.5767 LR: 0.000085  
Epoch: [284][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4210(1.3927) Grad: 2.5527 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9889(1.9889) 


Epoch 284 - avg_train_loss: 1.3927  avg_val_loss: 2.0632  time: 4s
Epoch 284 - Score: 0.5186


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1028(2.0632) 
Epoch: [285][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3302(1.3302) Grad: 2.7406 LR: 0.000038  
Epoch: [285][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1883(1.3172) Grad: 2.3826 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8639(1.8639) 


Epoch 285 - avg_train_loss: 1.3172  avg_val_loss: 1.9234  time: 3s
Epoch 285 - Score: 0.5480


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9492(1.9234) 
Epoch: [286][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3765(1.3765) Grad: 2.8215 LR: 0.000030  
Epoch: [286][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3155(1.2802) Grad: 2.6643 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.8322(1.8322) 


Epoch 286 - avg_train_loss: 1.2802  avg_val_loss: 1.8951  time: 3s
Epoch 286 - Score: 0.5565


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9623(1.8951) 
Epoch: [287][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2647(1.2647) Grad: 2.5311 LR: 0.000088  
Epoch: [287][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2899(1.2832) Grad: 2.8753 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8475(1.8475) 


Epoch 287 - avg_train_loss: 1.2832  avg_val_loss: 1.8968  time: 4s
Epoch 287 - Score: 0.5595


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9464(1.8968) 
Epoch: [288][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4725(1.4725) Grad: 3.0341 LR: 0.000409  
Epoch: [288][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3095(1.3281) Grad: 2.7976 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9212(1.9212) 


Epoch 288 - avg_train_loss: 1.3281  avg_val_loss: 1.9850  time: 3s
Epoch 288 - Score: 0.5408


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0218(1.9850) 
Epoch: [289][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2707(1.2707) Grad: 2.4416 LR: 0.000406  
Epoch: [289][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3176(1.4122) Grad: 2.6482 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2612(2.2612) 


Epoch 289 - avg_train_loss: 1.4122  avg_val_loss: 2.2994  time: 4s
Epoch 289 - Score: 0.4548


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3733(2.2994) 
Epoch: [290][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3311(1.3311) Grad: 2.6483 LR: 0.000409  
Epoch: [290][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6913(1.4930) Grad: 3.2421 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6377(2.6377) 


Epoch 290 - avg_train_loss: 1.4930  avg_val_loss: 2.7440  time: 4s
Epoch 290 - Score: 0.3653


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7344(2.7440) 
Epoch: [291][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5452(1.5452) Grad: 2.9876 LR: 0.000365  
Epoch: [291][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5753(1.5313) Grad: 2.9443 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4810(2.4810) 


Epoch 291 - avg_train_loss: 1.5313  avg_val_loss: 2.5394  time: 3s
Epoch 291 - Score: 0.4064


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5428(2.5394) 
Epoch: [292][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5129(1.5129) Grad: 2.8311 LR: 0.000278  
Epoch: [292][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4415(1.5237) Grad: 2.5929 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4015(2.4015) 


Epoch 292 - avg_train_loss: 1.5237  avg_val_loss: 2.4903  time: 3s
Epoch 292 - Score: 0.4176


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6097(2.4903) 
Epoch: [293][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4685(1.4685) Grad: 2.9384 LR: 0.000174  
Epoch: [293][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5108(1.4602) Grad: 2.6265 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 9s) Loss: 2.2146(2.2146) 


Epoch 293 - avg_train_loss: 1.4602  avg_val_loss: 2.3036  time: 4s
Epoch 293 - Score: 0.4542


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3812(2.3036) 
Epoch: [294][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3772(1.3772) Grad: 2.6912 LR: 0.000085  
Epoch: [294][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3025(1.3830) Grad: 2.5261 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9302(1.9302) 


Epoch 294 - avg_train_loss: 1.3830  avg_val_loss: 2.0001  time: 3s
Epoch 294 - Score: 0.5320


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0451(2.0001) 
Epoch: [295][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4616(1.4616) Grad: 2.9543 LR: 0.000038  
Epoch: [295][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2987(1.3041) Grad: 2.5158 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.8563(1.8563) 


Epoch 295 - avg_train_loss: 1.3041  avg_val_loss: 1.9129  time: 3s
Epoch 295 - Score: 0.5520


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9552(1.9129) 
Epoch: [296][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.2685(1.2685) Grad: 2.6126 LR: 0.000030  
Epoch: [296][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2363(1.2619) Grad: 2.6078 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8365(1.8365) 


Epoch 296 - avg_train_loss: 1.2619  avg_val_loss: 1.8922  time: 3s
Epoch 296 - Score: 0.5577


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9565(1.8922) 
Epoch: [297][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.1599(1.1599) Grad: 2.6001 LR: 0.000088  
Epoch: [297][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3506(1.2667) Grad: 2.6916 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8301(1.8301) 


Epoch 297 - avg_train_loss: 1.2667  avg_val_loss: 1.8708  time: 4s
Epoch 297 - Score: 0.5591


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9400(1.8708) 
Epoch: [298][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3131(1.3131) Grad: 2.6922 LR: 0.000409  
Epoch: [298][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3669(1.3054) Grad: 2.8578 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9982(1.9982) 


Epoch 298 - avg_train_loss: 1.3054  avg_val_loss: 2.0563  time: 4s
Epoch 298 - Score: 0.5180


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0899(2.0563) 
Epoch: [299][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3010(1.3010) Grad: 2.5709 LR: 0.000406  
Epoch: [299][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4666(1.4023) Grad: 3.0215 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2562(2.2562) 


Epoch 299 - avg_train_loss: 1.4023  avg_val_loss: 2.2932  time: 3s
Epoch 299 - Score: 0.4627


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3382(2.2932) 
Epoch: [300][0/138] Elapsed 0m 0s (remain 0m 30s) Loss: 1.4737(1.4737) Grad: 2.8312 LR: 0.000409  
Epoch: [300][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6407(1.4959) Grad: 3.0357 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3909(2.3909) 


Epoch 300 - avg_train_loss: 1.4959  avg_val_loss: 2.4491  time: 4s
Epoch 300 - Score: 0.4209


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5269(2.4491) 
Epoch: [301][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6330(1.6330) Grad: 3.3104 LR: 0.000365  
Epoch: [301][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5461(1.5449) Grad: 2.8936 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5262(2.5262) 


Epoch 301 - avg_train_loss: 1.5449  avg_val_loss: 2.6268  time: 3s
Epoch 301 - Score: 0.3889


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7048(2.6268) 
Epoch: [302][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.6994(1.6994) Grad: 3.0133 LR: 0.000278  
Epoch: [302][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5387(1.5408) Grad: 2.7464 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5469(2.5469) 


Epoch 302 - avg_train_loss: 1.5408  avg_val_loss: 2.5927  time: 3s
Epoch 302 - Score: 0.3923


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6603(2.5927) 
Epoch: [303][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5568(1.5568) Grad: 2.7153 LR: 0.000174  
Epoch: [303][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5505(1.4740) Grad: 2.7934 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1391(2.1391) 


Epoch 303 - avg_train_loss: 1.4740  avg_val_loss: 2.2371  time: 4s
Epoch 303 - Score: 0.4750


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2711(2.2371) 
Epoch: [304][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3970(1.3970) Grad: 2.6197 LR: 0.000085  
Epoch: [304][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4376(1.3833) Grad: 2.6533 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.9659(1.9659) 


Epoch 304 - avg_train_loss: 1.3833  avg_val_loss: 1.9801  time: 3s
Epoch 304 - Score: 0.5318


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0277(1.9801) 
Epoch: [305][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2836(1.2836) Grad: 2.5700 LR: 0.000038  
Epoch: [305][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2181(1.3076) Grad: 2.5272 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8873(1.8873) 


Epoch 305 - avg_train_loss: 1.3076  avg_val_loss: 1.8955  time: 3s
Epoch 305 - Score: 0.5562


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9440(1.8955) 
Epoch: [306][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1959(1.1959) Grad: 2.4487 LR: 0.000030  
Epoch: [306][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3370(1.2672) Grad: 2.9226 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8590(1.8590) 


Epoch 306 - avg_train_loss: 1.2672  avg_val_loss: 1.8867  time: 4s
Epoch 306 - Score: 0.5602
Epoch 306 - Save Best score: 0.5602 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9398(1.8867) 
Epoch: [307][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2032(1.2032) Grad: 2.7461 LR: 0.000088  
Epoch: [307][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3097(1.2744) Grad: 2.5925 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8710(1.8710) 


Epoch 307 - avg_train_loss: 1.2744  avg_val_loss: 1.8992  time: 4s
Epoch 307 - Score: 0.5575


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9670(1.8992) 
Epoch: [308][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2542(1.2542) Grad: 2.6171 LR: 0.000409  
Epoch: [308][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3479(1.3160) Grad: 2.7364 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8740(1.8740) 


Epoch 308 - avg_train_loss: 1.3160  avg_val_loss: 1.9445  time: 3s
Epoch 308 - Score: 0.5390


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9532(1.9445) 
Epoch: [309][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3178(1.3178) Grad: 2.6041 LR: 0.000406  
Epoch: [309][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3538(1.3946) Grad: 2.6813 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2244(2.2244) 


Epoch 309 - avg_train_loss: 1.3946  avg_val_loss: 2.2541  time: 4s
Epoch 309 - Score: 0.4684


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2956(2.2541) 
Epoch: [310][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.4226(1.4226) Grad: 2.7791 LR: 0.000409  
Epoch: [310][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4777(1.4915) Grad: 3.0128 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4150(2.4150) 


Epoch 310 - avg_train_loss: 1.4915  avg_val_loss: 2.4853  time: 3s
Epoch 310 - Score: 0.4145


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5961(2.4853) 
Epoch: [311][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3114(1.3114) Grad: 2.7370 LR: 0.000365  
Epoch: [311][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6976(1.5317) Grad: 2.8911 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.6503(2.6503) 


Epoch 311 - avg_train_loss: 1.5317  avg_val_loss: 2.7006  time: 3s
Epoch 311 - Score: 0.3754


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7360(2.7006) 
Epoch: [312][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4842(1.4842) Grad: 2.9239 LR: 0.000278  
Epoch: [312][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5300(1.5181) Grad: 2.5978 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.5720(2.5720) 


Epoch 312 - avg_train_loss: 1.5181  avg_val_loss: 2.6693  time: 4s
Epoch 312 - Score: 0.3723


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6335(2.6693) 
Epoch: [313][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6713(1.6713) Grad: 2.9195 LR: 0.000174  
Epoch: [313][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5623(1.4530) Grad: 2.8250 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.3166(2.3166) 


Epoch 313 - avg_train_loss: 1.4530  avg_val_loss: 2.3492  time: 3s
Epoch 313 - Score: 0.4458


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4172(2.3492) 
Epoch: [314][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.4049(1.4049) Grad: 2.5469 LR: 0.000085  
Epoch: [314][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4224(1.3686) Grad: 2.6874 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0191(2.0191) 


Epoch 314 - avg_train_loss: 1.3686  avg_val_loss: 2.0430  time: 3s
Epoch 314 - Score: 0.5188


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1224(2.0430) 
Epoch: [315][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2771(1.2771) Grad: 2.5798 LR: 0.000038  
Epoch: [315][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3516(1.3001) Grad: 2.9107 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8790(1.8790) 


Epoch 315 - avg_train_loss: 1.3001  avg_val_loss: 1.9093  time: 3s
Epoch 315 - Score: 0.5549


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9747(1.9093) 
Epoch: [316][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.3020(1.3020) Grad: 2.6974 LR: 0.000030  
Epoch: [316][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2478(1.2552) Grad: 2.7136 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8353(1.8353) 


Epoch 316 - avg_train_loss: 1.2552  avg_val_loss: 1.8701  time: 4s
Epoch 316 - Score: 0.5627
Epoch 316 - Save Best score: 0.5627 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9391(1.8701) 
Epoch: [317][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.1975(1.1975) Grad: 2.6890 LR: 0.000088  
Epoch: [317][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3045(1.2551) Grad: 2.7036 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8506(1.8506) 


Epoch 317 - avg_train_loss: 1.2551  avg_val_loss: 1.9095  time: 3s
Epoch 317 - Score: 0.5532


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9626(1.9095) 
Epoch: [318][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2508(1.2508) Grad: 2.5947 LR: 0.000409  
Epoch: [318][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2949(1.2949) Grad: 2.9936 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9360(1.9360) 


Epoch 318 - avg_train_loss: 1.2949  avg_val_loss: 1.9931  time: 4s
Epoch 318 - Score: 0.5278


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0626(1.9931) 
Epoch: [319][0/138] Elapsed 0m 0s (remain 0m 30s) Loss: 1.3202(1.3202) Grad: 2.8244 LR: 0.000406  
Epoch: [319][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4264(1.3819) Grad: 2.9718 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1600(2.1600) 


Epoch 319 - avg_train_loss: 1.3819  avg_val_loss: 2.2319  time: 3s
Epoch 319 - Score: 0.4779


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2816(2.2319) 
Epoch: [320][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4536(1.4536) Grad: 2.6482 LR: 0.000409  
Epoch: [320][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4959(1.4770) Grad: 2.7684 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.7286(2.7286) 


Epoch 320 - avg_train_loss: 1.4770  avg_val_loss: 2.7863  time: 3s
Epoch 320 - Score: 0.3657


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8682(2.7863) 
Epoch: [321][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.5561(1.5561) Grad: 2.8214 LR: 0.000365  
Epoch: [321][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6106(1.5245) Grad: 2.9025 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.7646(2.7646) 


Epoch 321 - avg_train_loss: 1.5245  avg_val_loss: 2.8407  time: 3s
Epoch 321 - Score: 0.3437


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9344(2.8407) 
Epoch: [322][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.5426(1.5426) Grad: 2.8287 LR: 0.000278  
Epoch: [322][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5291(1.5073) Grad: 3.2098 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.4425(2.4425) 


Epoch 322 - avg_train_loss: 1.5073  avg_val_loss: 2.4595  time: 4s
Epoch 322 - Score: 0.4291


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5274(2.4595) 
Epoch: [323][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3266(1.3266) Grad: 2.5675 LR: 0.000174  
Epoch: [323][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5248(1.4539) Grad: 2.7690 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0284(2.0284) 


Epoch 323 - avg_train_loss: 1.4539  avg_val_loss: 2.1168  time: 3s
Epoch 323 - Score: 0.4985


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1757(2.1168) 
Epoch: [324][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4746(1.4746) Grad: 2.8025 LR: 0.000085  
Epoch: [324][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3547(1.3668) Grad: 2.6559 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9425(1.9425) 


Epoch 324 - avg_train_loss: 1.3668  avg_val_loss: 2.0050  time: 3s
Epoch 324 - Score: 0.5273


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0914(2.0050) 
Epoch: [325][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3906(1.3906) Grad: 2.7476 LR: 0.000038  
Epoch: [325][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2030(1.2895) Grad: 2.6794 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 9s) Loss: 1.8947(1.8947) 


Epoch 325 - avg_train_loss: 1.2895  avg_val_loss: 1.9191  time: 4s
Epoch 325 - Score: 0.5544


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9791(1.9191) 
Epoch: [326][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2558(1.2558) Grad: 2.6219 LR: 0.000030  
Epoch: [326][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2646(1.2451) Grad: 2.6134 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8481(1.8481) 


Epoch 326 - avg_train_loss: 1.2451  avg_val_loss: 1.8840  time: 3s
Epoch 326 - Score: 0.5625


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9715(1.8840) 
Epoch: [327][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4059(1.4059) Grad: 2.9696 LR: 0.000088  
Epoch: [327][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2445(1.2537) Grad: 2.6330 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8842(1.8842) 


Epoch 327 - avg_train_loss: 1.2537  avg_val_loss: 1.9229  time: 3s
Epoch 327 - Score: 0.5525


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9918(1.9229) 
Epoch: [328][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2343(1.2343) Grad: 2.6041 LR: 0.000409  
Epoch: [328][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3116(1.2938) Grad: 2.7393 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9846(1.9846) 


Epoch 328 - avg_train_loss: 1.2938  avg_val_loss: 2.0424  time: 4s
Epoch 328 - Score: 0.5173


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1560(2.0424) 
Epoch: [329][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2975(1.2975) Grad: 2.6935 LR: 0.000406  
Epoch: [329][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4202(1.3858) Grad: 2.9362 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1700(2.1700) 


Epoch 329 - avg_train_loss: 1.3858  avg_val_loss: 2.2326  time: 3s
Epoch 329 - Score: 0.4717


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2859(2.2326) 
Epoch: [330][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5071(1.5071) Grad: 3.2273 LR: 0.000409  
Epoch: [330][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6086(1.4708) Grad: 3.1918 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5937(2.5937) 


Epoch 330 - avg_train_loss: 1.4708  avg_val_loss: 2.6795  time: 3s
Epoch 330 - Score: 0.3793


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7106(2.6795) 
Epoch: [331][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4803(1.4803) Grad: 2.7397 LR: 0.000365  
Epoch: [331][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5036(1.5174) Grad: 2.6328 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.6267(2.6267) 


Epoch 331 - avg_train_loss: 1.5174  avg_val_loss: 2.6914  time: 4s
Epoch 331 - Score: 0.3798


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7022(2.6914) 
Epoch: [332][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.6147(1.6147) Grad: 2.8771 LR: 0.000278  
Epoch: [332][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4539(1.5049) Grad: 2.6198 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3579(2.3579) 


Epoch 332 - avg_train_loss: 1.5049  avg_val_loss: 2.4688  time: 3s
Epoch 332 - Score: 0.4157


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5104(2.4688) 
Epoch: [333][0/138] Elapsed 0m 0s (remain 0m 19s) Loss: 1.6014(1.6014) Grad: 3.0012 LR: 0.000174  
Epoch: [333][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5302(1.4430) Grad: 2.9016 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1074(2.1074) 


Epoch 333 - avg_train_loss: 1.4430  avg_val_loss: 2.1567  time: 3s
Epoch 333 - Score: 0.4995


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2136(2.1567) 
Epoch: [334][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3643(1.3643) Grad: 2.6818 LR: 0.000085  
Epoch: [334][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3974(1.3550) Grad: 2.7556 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 14s) Loss: 1.9766(1.9766) 


Epoch 334 - avg_train_loss: 1.3550  avg_val_loss: 2.0020  time: 4s
Epoch 334 - Score: 0.5295


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.0819(2.0020) 
Epoch: [335][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3306(1.3306) Grad: 2.4755 LR: 0.000038  
Epoch: [335][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1868(1.2781) Grad: 2.5639 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8463(1.8463) 


Epoch 335 - avg_train_loss: 1.2781  avg_val_loss: 1.8860  time: 3s
Epoch 335 - Score: 0.5582


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9679(1.8860) 
Epoch: [336][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2117(1.2117) Grad: 2.5625 LR: 0.000030  
Epoch: [336][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2206(1.2320) Grad: 2.6288 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8175(1.8175) 


Epoch 336 - avg_train_loss: 1.2320  avg_val_loss: 1.8602  time: 3s
Epoch 336 - Score: 0.5631
Epoch 336 - Save Best score: 0.5631 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9187(1.8602) 
Epoch: [337][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.1376(1.1376) Grad: 2.5355 LR: 0.000088  
Epoch: [337][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1802(1.2421) Grad: 2.7085 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9418(1.9418) 


Epoch 337 - avg_train_loss: 1.2421  avg_val_loss: 1.9495  time: 4s
Epoch 337 - Score: 0.5509


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0610(1.9495) 
Epoch: [338][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.1744(1.1744) Grad: 2.6636 LR: 0.000409  
Epoch: [338][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2676(1.2904) Grad: 2.5163 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0582(2.0582) 


Epoch 338 - avg_train_loss: 1.2904  avg_val_loss: 2.0938  time: 3s
Epoch 338 - Score: 0.5100


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1495(2.0938) 
Epoch: [339][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3184(1.3184) Grad: 2.8307 LR: 0.000406  
Epoch: [339][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2970(1.3680) Grad: 2.5930 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.3992(2.3992) 


Epoch 339 - avg_train_loss: 1.3680  avg_val_loss: 2.4405  time: 3s
Epoch 339 - Score: 0.4359


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5424(2.4405) 
Epoch: [340][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4660(1.4660) Grad: 2.9854 LR: 0.000409  
Epoch: [340][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5240(1.4718) Grad: 2.8314 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.5285(2.5285) 


Epoch 340 - avg_train_loss: 1.4718  avg_val_loss: 2.6150  time: 3s
Epoch 340 - Score: 0.3888


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6355(2.6150) 
Epoch: [341][0/138] Elapsed 0m 0s (remain 0m 32s) Loss: 1.4536(1.4536) Grad: 2.8016 LR: 0.000365  
Epoch: [341][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6469(1.5108) Grad: 2.9003 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.6262(2.6262) 


Epoch 341 - avg_train_loss: 1.5108  avg_val_loss: 2.6570  time: 4s
Epoch 341 - Score: 0.3992


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6863(2.6570) 
Epoch: [342][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5251(1.5251) Grad: 2.9236 LR: 0.000278  
Epoch: [342][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4827(1.5025) Grad: 2.6842 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1516(2.1516) 


Epoch 342 - avg_train_loss: 1.5025  avg_val_loss: 2.2145  time: 3s
Epoch 342 - Score: 0.4793


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3105(2.2145) 
Epoch: [343][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4207(1.4207) Grad: 2.6481 LR: 0.000174  
Epoch: [343][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4048(1.4242) Grad: 2.4594 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.2604(2.2604) 


Epoch 343 - avg_train_loss: 1.4242  avg_val_loss: 2.2965  time: 4s
Epoch 343 - Score: 0.4586


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.3869(2.2965) 
Epoch: [344][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3229(1.3229) Grad: 2.5681 LR: 0.000085  
Epoch: [344][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1821(1.3354) Grad: 2.3480 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9731(1.9731) 


Epoch 344 - avg_train_loss: 1.3354  avg_val_loss: 2.0206  time: 4s
Epoch 344 - Score: 0.5281


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1071(2.0206) 
Epoch: [345][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5083(1.5083) Grad: 2.6648 LR: 0.000038  
Epoch: [345][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2324(1.2729) Grad: 2.5971 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8441(1.8441) 


Epoch 345 - avg_train_loss: 1.2729  avg_val_loss: 1.9044  time: 3s
Epoch 345 - Score: 0.5585


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0131(1.9044) 
Epoch: [346][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2419(1.2419) Grad: 2.5963 LR: 0.000030  
Epoch: [346][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1692(1.2317) Grad: 2.4652 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8449(1.8449) 


Epoch 346 - avg_train_loss: 1.2317  avg_val_loss: 1.8620  time: 3s
Epoch 346 - Score: 0.5649
Epoch 346 - Save Best score: 0.5649 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9583(1.8620) 
Epoch: [347][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3204(1.3204) Grad: 2.6038 LR: 0.000088  
Epoch: [347][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2490(1.2344) Grad: 2.6136 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9204(1.9204) 


Epoch 347 - avg_train_loss: 1.2344  avg_val_loss: 1.9279  time: 4s
Epoch 347 - Score: 0.5528


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0082(1.9279) 
Epoch: [348][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1974(1.1974) Grad: 2.5225 LR: 0.000409  
Epoch: [348][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3322(1.2775) Grad: 2.7865 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9373(1.9373) 


Epoch 348 - avg_train_loss: 1.2775  avg_val_loss: 1.9874  time: 3s
Epoch 348 - Score: 0.5237


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0436(1.9874) 
Epoch: [349][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1367(1.1367) Grad: 2.3558 LR: 0.000406  
Epoch: [349][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3977(1.3637) Grad: 2.8462 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.0859(2.0859) 


Epoch 349 - avg_train_loss: 1.3637  avg_val_loss: 2.1751  time: 3s
Epoch 349 - Score: 0.4905


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2087(2.1751) 
Epoch: [350][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4014(1.4014) Grad: 2.7536 LR: 0.000409  
Epoch: [350][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3940(1.4508) Grad: 2.6353 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6065(2.6065) 


Epoch 350 - avg_train_loss: 1.4508  avg_val_loss: 2.5779  time: 4s
Epoch 350 - Score: 0.4031


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5945(2.5779) 
Epoch: [351][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.5757(1.5757) Grad: 2.7996 LR: 0.000365  
Epoch: [351][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4984(1.5111) Grad: 2.7992 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5433(2.5433) 


Epoch 351 - avg_train_loss: 1.5111  avg_val_loss: 2.5988  time: 3s
Epoch 351 - Score: 0.3968


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7096(2.5988) 
Epoch: [352][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3778(1.3778) Grad: 2.9518 LR: 0.000278  
Epoch: [352][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5384(1.4934) Grad: 2.7694 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.4466(2.4466) 


Epoch 352 - avg_train_loss: 1.4934  avg_val_loss: 2.5045  time: 4s
Epoch 352 - Score: 0.4203


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.5834(2.5045) 
Epoch: [353][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.3435(1.3435) Grad: 2.5772 LR: 0.000174  
Epoch: [353][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4840(1.4277) Grad: 2.8169 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.3548(2.3548) 


Epoch 353 - avg_train_loss: 1.4277  avg_val_loss: 2.3894  time: 4s
Epoch 353 - Score: 0.4391


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5150(2.3894) 
Epoch: [354][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3897(1.3897) Grad: 2.5530 LR: 0.000085  
Epoch: [354][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4234(1.3377) Grad: 2.6358 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8786(1.8786) 


Epoch 354 - avg_train_loss: 1.3377  avg_val_loss: 1.9536  time: 3s
Epoch 354 - Score: 0.5360


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0302(1.9536) 
Epoch: [355][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2495(1.2495) Grad: 2.4449 LR: 0.000038  
Epoch: [355][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.0196(1.2640) Grad: 2.4082 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8164(1.8164) 


Epoch 355 - avg_train_loss: 1.2640  avg_val_loss: 1.8906  time: 3s
Epoch 355 - Score: 0.5567


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9534(1.8906) 
Epoch: [356][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2524(1.2524) Grad: 2.8149 LR: 0.000030  
Epoch: [356][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2701(1.2285) Grad: 2.5993 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.8033(1.8033) 


Epoch 356 - avg_train_loss: 1.2285  avg_val_loss: 1.8659  time: 4s
Epoch 356 - Score: 0.5666
Epoch 356 - Save Best score: 0.5666 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9288(1.8659) 
Epoch: [357][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1871(1.1871) Grad: 2.3341 LR: 0.000088  
Epoch: [357][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2549(1.2345) Grad: 2.6273 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8185(1.8185) 


Epoch 357 - avg_train_loss: 1.2345  avg_val_loss: 1.8895  time: 3s
Epoch 357 - Score: 0.5607


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0151(1.8895) 
Epoch: [358][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2006(1.2006) Grad: 2.7206 LR: 0.000409  
Epoch: [358][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3139(1.2729) Grad: 2.6766 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.9713(1.9713) 


Epoch 358 - avg_train_loss: 1.2729  avg_val_loss: 2.0417  time: 3s
Epoch 358 - Score: 0.5236


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1115(2.0417) 
Epoch: [359][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3339(1.3339) Grad: 2.7049 LR: 0.000406  
Epoch: [359][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4093(1.3596) Grad: 2.7535 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0932(2.0932) 


Epoch 359 - avg_train_loss: 1.3596  avg_val_loss: 2.1779  time: 3s
Epoch 359 - Score: 0.4799


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2425(2.1779) 
Epoch: [360][0/138] Elapsed 0m 0s (remain 0m 24s) Loss: 1.1201(1.1201) Grad: 2.5318 LR: 0.000409  
Epoch: [360][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4589(1.4539) Grad: 2.8079 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.3042(2.3042) 


Epoch 360 - avg_train_loss: 1.4539  avg_val_loss: 2.3497  time: 4s
Epoch 360 - Score: 0.4455


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4468(2.3497) 
Epoch: [361][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4823(1.4823) Grad: 2.9323 LR: 0.000365  
Epoch: [361][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5726(1.5056) Grad: 3.0556 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.6730(2.6730) 


Epoch 361 - avg_train_loss: 1.5056  avg_val_loss: 2.7283  time: 4s
Epoch 361 - Score: 0.3822


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.7282(2.7283) 
Epoch: [362][0/138] Elapsed 0m 0s (remain 0m 31s) Loss: 1.6457(1.6457) Grad: 2.9679 LR: 0.000278  
Epoch: [362][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5477(1.4950) Grad: 2.8653 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4728(2.4728) 


Epoch 362 - avg_train_loss: 1.4950  avg_val_loss: 2.5409  time: 3s
Epoch 362 - Score: 0.4085


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5789(2.5409) 
Epoch: [363][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.4736(1.4736) Grad: 2.6629 LR: 0.000174  
Epoch: [363][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5503(1.4290) Grad: 2.9137 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3201(2.3201) 


Epoch 363 - avg_train_loss: 1.4290  avg_val_loss: 2.3563  time: 4s
Epoch 363 - Score: 0.4468


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4114(2.3563) 
Epoch: [364][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2974(1.2974) Grad: 2.5808 LR: 0.000085  
Epoch: [364][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2779(1.3261) Grad: 2.7483 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0375(2.0375) 


Epoch 364 - avg_train_loss: 1.3261  avg_val_loss: 2.1197  time: 3s
Epoch 364 - Score: 0.5125


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1616(2.1197) 
Epoch: [365][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2888(1.2888) Grad: 2.5951 LR: 0.000038  
Epoch: [365][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1952(1.2602) Grad: 2.5095 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8920(1.8920) 


Epoch 365 - avg_train_loss: 1.2602  avg_val_loss: 1.9434  time: 3s
Epoch 365 - Score: 0.5524


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0320(1.9434) 
Epoch: [366][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2801(1.2801) Grad: 2.4808 LR: 0.000030  
Epoch: [366][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1889(1.2185) Grad: 2.6823 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8296(1.8296) 


Epoch 366 - avg_train_loss: 1.2185  avg_val_loss: 1.8750  time: 4s
Epoch 366 - Score: 0.5652


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9335(1.8750) 
Epoch: [367][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1775(1.1775) Grad: 2.7123 LR: 0.000088  
Epoch: [367][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1872(1.2188) Grad: 2.5544 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.9042(1.9042) 


Epoch 367 - avg_train_loss: 1.2188  avg_val_loss: 1.9625  time: 3s
Epoch 367 - Score: 0.5551


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0597(1.9625) 
Epoch: [368][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3072(1.3072) Grad: 2.7137 LR: 0.000409  
Epoch: [368][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3077(1.2773) Grad: 2.6592 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9178(1.9178) 


Epoch 368 - avg_train_loss: 1.2773  avg_val_loss: 1.9793  time: 3s
Epoch 368 - Score: 0.5360


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0617(1.9793) 
Epoch: [369][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2246(1.2246) Grad: 2.5927 LR: 0.000406  
Epoch: [369][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4585(1.3542) Grad: 3.0800 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1273(2.1273) 


Epoch 369 - avg_train_loss: 1.3542  avg_val_loss: 2.2033  time: 4s
Epoch 369 - Score: 0.4798


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2509(2.2033) 
Epoch: [370][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2910(1.2910) Grad: 2.6722 LR: 0.000409  
Epoch: [370][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5636(1.4422) Grad: 3.2396 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4353(2.4353) 


Epoch 370 - avg_train_loss: 1.4422  avg_val_loss: 2.4709  time: 4s
Epoch 370 - Score: 0.4201


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.5090(2.4709) 
Epoch: [371][0/138] Elapsed 0m 0s (remain 0m 31s) Loss: 1.4034(1.4034) Grad: 3.3135 LR: 0.000365  
Epoch: [371][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5401(1.4994) Grad: 2.8708 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5050(2.5050) 


Epoch 371 - avg_train_loss: 1.4994  avg_val_loss: 2.5937  time: 3s
Epoch 371 - Score: 0.4095


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7004(2.5937) 
Epoch: [372][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5384(1.5384) Grad: 2.9521 LR: 0.000278  
Epoch: [372][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4360(1.4730) Grad: 2.6911 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.2421(2.2421) 


Epoch 372 - avg_train_loss: 1.4730  avg_val_loss: 2.3666  time: 4s
Epoch 372 - Score: 0.4441


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4187(2.3666) 
Epoch: [373][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3882(1.3882) Grad: 2.5330 LR: 0.000174  
Epoch: [373][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4132(1.4084) Grad: 2.8464 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1915(2.1915) 


Epoch 373 - avg_train_loss: 1.4084  avg_val_loss: 2.2534  time: 3s
Epoch 373 - Score: 0.4707


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3123(2.2534) 
Epoch: [374][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4100(1.4100) Grad: 2.7642 LR: 0.000085  
Epoch: [374][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3624(1.3235) Grad: 2.5768 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9415(1.9415) 


Epoch 374 - avg_train_loss: 1.3235  avg_val_loss: 1.9812  time: 3s
Epoch 374 - Score: 0.5367


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0477(1.9812) 
Epoch: [375][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2132(1.2132) Grad: 2.9000 LR: 0.000038  
Epoch: [375][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3200(1.2555) Grad: 2.6730 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 9s) Loss: 1.8454(1.8454) 


Epoch 375 - avg_train_loss: 1.2555  avg_val_loss: 1.8892  time: 4s
Epoch 375 - Score: 0.5596


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9720(1.8892) 
Epoch: [376][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1699(1.1699) Grad: 2.4866 LR: 0.000030  
Epoch: [376][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2814(1.2138) Grad: 2.7644 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.8338(1.8338) 


Epoch 376 - avg_train_loss: 1.2138  avg_val_loss: 1.8753  time: 3s
Epoch 376 - Score: 0.5639


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9623(1.8753) 
Epoch: [377][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2599(1.2599) Grad: 2.5225 LR: 0.000088  
Epoch: [377][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2146(1.2151) Grad: 2.7639 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8511(1.8511) 


Epoch 377 - avg_train_loss: 1.2151  avg_val_loss: 1.8954  time: 3s
Epoch 377 - Score: 0.5596


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9764(1.8954) 
Epoch: [378][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1469(1.1469) Grad: 2.5654 LR: 0.000409  
Epoch: [378][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4484(1.2548) Grad: 2.8898 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9758(1.9758) 


Epoch 378 - avg_train_loss: 1.2548  avg_val_loss: 2.0465  time: 3s
Epoch 378 - Score: 0.5149


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1310(2.0465) 
Epoch: [379][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.3171(1.3171) Grad: 2.7606 LR: 0.000406  
Epoch: [379][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3091(1.3422) Grad: 2.7639 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2296(2.2296) 


Epoch 379 - avg_train_loss: 1.3422  avg_val_loss: 2.2644  time: 4s
Epoch 379 - Score: 0.4734


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.3319(2.2644) 
Epoch: [380][0/138] Elapsed 0m 0s (remain 0m 29s) Loss: 1.4154(1.4154) Grad: 2.9394 LR: 0.000409  
Epoch: [380][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5031(1.4471) Grad: 2.9466 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3489(2.3489) 


Epoch 380 - avg_train_loss: 1.4471  avg_val_loss: 2.4127  time: 4s
Epoch 380 - Score: 0.4251


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4434(2.4127) 
Epoch: [381][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3520(1.3520) Grad: 2.6548 LR: 0.000365  
Epoch: [381][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4366(1.4973) Grad: 2.6460 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.7209(2.7209) 


Epoch 381 - avg_train_loss: 1.4973  avg_val_loss: 2.7919  time: 3s
Epoch 381 - Score: 0.3544


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8854(2.7919) 
Epoch: [382][0/138] Elapsed 0m 0s (remain 0m 29s) Loss: 1.4717(1.4717) Grad: 2.7322 LR: 0.000278  
Epoch: [382][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3561(1.4755) Grad: 2.4519 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.3903(2.3903) 


Epoch 382 - avg_train_loss: 1.4755  avg_val_loss: 2.4894  time: 4s
Epoch 382 - Score: 0.4332


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5203(2.4894) 
Epoch: [383][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3663(1.3663) Grad: 2.5294 LR: 0.000174  
Epoch: [383][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4470(1.4027) Grad: 2.7737 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1561(2.1561) 


Epoch 383 - avg_train_loss: 1.4027  avg_val_loss: 2.2446  time: 3s
Epoch 383 - Score: 0.4755


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3077(2.2446) 
Epoch: [384][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3350(1.3350) Grad: 2.7162 LR: 0.000085  
Epoch: [384][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4104(1.3176) Grad: 2.6102 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9571(1.9571) 


Epoch 384 - avg_train_loss: 1.3176  avg_val_loss: 2.0068  time: 3s
Epoch 384 - Score: 0.5318


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1098(2.0068) 
Epoch: [385][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2877(1.2877) Grad: 2.5747 LR: 0.000038  
Epoch: [385][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3461(1.2547) Grad: 2.6082 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.8149(1.8149) 


Epoch 385 - avg_train_loss: 1.2547  avg_val_loss: 1.8774  time: 4s
Epoch 385 - Score: 0.5584


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9410(1.8774) 
Epoch: [386][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.1100(1.1100) Grad: 2.4135 LR: 0.000030  
Epoch: [386][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1954(1.2068) Grad: 2.4940 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8184(1.8184) 


Epoch 386 - avg_train_loss: 1.2068  avg_val_loss: 1.8734  time: 3s
Epoch 386 - Score: 0.5649


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9533(1.8734) 
Epoch: [387][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1981(1.1981) Grad: 2.6241 LR: 0.000088  
Epoch: [387][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2579(1.2114) Grad: 2.6117 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8334(1.8334) 


Epoch 387 - avg_train_loss: 1.2114  avg_val_loss: 1.8959  time: 3s
Epoch 387 - Score: 0.5598


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9715(1.8959) 
Epoch: [388][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3449(1.3449) Grad: 2.6444 LR: 0.000409  
Epoch: [388][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2380(1.2588) Grad: 2.5529 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9623(1.9623) 


Epoch 388 - avg_train_loss: 1.2588  avg_val_loss: 1.9979  time: 4s
Epoch 388 - Score: 0.5319


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0750(1.9979) 
Epoch: [389][0/138] Elapsed 0m 0s (remain 0m 31s) Loss: 1.3534(1.3534) Grad: 2.8012 LR: 0.000406  
Epoch: [389][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1904(1.3348) Grad: 2.5326 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2249(2.2249) 


Epoch 389 - avg_train_loss: 1.3348  avg_val_loss: 2.2622  time: 4s
Epoch 389 - Score: 0.4685


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3392(2.2622) 
Epoch: [390][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3852(1.3852) Grad: 2.8373 LR: 0.000409  
Epoch: [390][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5068(1.4330) Grad: 2.7666 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.8511(2.8511) 


Epoch 390 - avg_train_loss: 1.4330  avg_val_loss: 2.9419  time: 3s
Epoch 390 - Score: 0.3212


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9878(2.9419) 
Epoch: [391][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5316(1.5316) Grad: 3.0371 LR: 0.000365  
Epoch: [391][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3779(1.4803) Grad: 2.5776 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.6543(2.6543) 


Epoch 391 - avg_train_loss: 1.4803  avg_val_loss: 2.6803  time: 4s
Epoch 391 - Score: 0.3829


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7317(2.6803) 
Epoch: [392][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.6382(1.6382) Grad: 2.8028 LR: 0.000278  
Epoch: [392][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5554(1.4585) Grad: 2.8905 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4156(2.4156) 


Epoch 392 - avg_train_loss: 1.4585  avg_val_loss: 2.4444  time: 3s
Epoch 392 - Score: 0.4393


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4447(2.4444) 
Epoch: [393][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3713(1.3713) Grad: 2.6136 LR: 0.000174  
Epoch: [393][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3373(1.4063) Grad: 2.7529 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0814(2.0814) 


Epoch 393 - avg_train_loss: 1.4063  avg_val_loss: 2.1590  time: 3s
Epoch 393 - Score: 0.4860


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2106(2.1590) 
Epoch: [394][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4245(1.4245) Grad: 2.8612 LR: 0.000085  
Epoch: [394][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3171(1.3169) Grad: 2.6648 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 10s) Loss: 1.9614(1.9614) 


Epoch 394 - avg_train_loss: 1.3169  avg_val_loss: 1.9743  time: 4s
Epoch 394 - Score: 0.5301


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 2.0927(1.9743) 
Epoch: [395][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.2618(1.2618) Grad: 2.8676 LR: 0.000038  
Epoch: [395][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2004(1.2414) Grad: 2.4917 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8549(1.8549) 


Epoch 395 - avg_train_loss: 1.2414  avg_val_loss: 1.9038  time: 3s
Epoch 395 - Score: 0.5590


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0132(1.9038) 
Epoch: [396][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1779(1.1779) Grad: 2.4333 LR: 0.000030  
Epoch: [396][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1303(1.2067) Grad: 2.3718 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8600(1.8600) 


Epoch 396 - avg_train_loss: 1.2067  avg_val_loss: 1.8867  time: 3s
Epoch 396 - Score: 0.5641


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9929(1.8867) 
Epoch: [397][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1947(1.1947) Grad: 2.5108 LR: 0.000088  
Epoch: [397][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1700(1.2010) Grad: 2.7302 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8611(1.8611) 


Epoch 397 - avg_train_loss: 1.2010  avg_val_loss: 1.8935  time: 3s
Epoch 397 - Score: 0.5585


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9791(1.8935) 
Epoch: [398][0/138] Elapsed 0m 0s (remain 0m 28s) Loss: 1.2084(1.2084) Grad: 2.6346 LR: 0.000409  
Epoch: [398][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.1091(1.2462) Grad: 2.6524 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9948(1.9948) 


Epoch 398 - avg_train_loss: 1.2462  avg_val_loss: 2.0410  time: 4s
Epoch 398 - Score: 0.5230


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1225(2.0410) 
Epoch: [399][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2685(1.2685) Grad: 2.6029 LR: 0.000406  
Epoch: [399][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4402(1.3315) Grad: 2.8974 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2101(2.2101) 


Epoch 399 - avg_train_loss: 1.3315  avg_val_loss: 2.1843  time: 3s
Epoch 399 - Score: 0.4852


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2727(2.1843) 
Epoch: [400][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4506(1.4506) Grad: 3.0137 LR: 0.000409  
Epoch: [400][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5442(1.4379) Grad: 3.0541 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4408(2.4408) 


Epoch 400 - avg_train_loss: 1.4379  avg_val_loss: 2.4931  time: 3s
Epoch 400 - Score: 0.4200


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5623(2.4931) 
Epoch: [401][0/138] Elapsed 0m 0s (remain 0m 30s) Loss: 1.4958(1.4958) Grad: 2.8598 LR: 0.000365  
Epoch: [401][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4734(1.4717) Grad: 2.7744 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4910(2.4910) 


Epoch 401 - avg_train_loss: 1.4717  avg_val_loss: 2.5933  time: 4s
Epoch 401 - Score: 0.3977


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6147(2.5933) 
Epoch: [402][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4569(1.4569) Grad: 2.5747 LR: 0.000278  
Epoch: [402][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4440(1.4534) Grad: 2.6861 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4793(2.4793) 


Epoch 402 - avg_train_loss: 1.4534  avg_val_loss: 2.5297  time: 3s
Epoch 402 - Score: 0.4072


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5662(2.5297) 
Epoch: [403][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2849(1.2849) Grad: 2.4924 LR: 0.000174  
Epoch: [403][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2874(1.3870) Grad: 2.6805 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.1934(2.1934) 


Epoch 403 - avg_train_loss: 1.3870  avg_val_loss: 2.2331  time: 3s
Epoch 403 - Score: 0.4730


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2402(2.2331) 
Epoch: [404][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4145(1.4145) Grad: 2.7333 LR: 0.000085  
Epoch: [404][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2904(1.3055) Grad: 2.6573 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9149(1.9149) 


Epoch 404 - avg_train_loss: 1.3055  avg_val_loss: 1.9943  time: 4s
Epoch 404 - Score: 0.5328


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0341(1.9943) 
Epoch: [405][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1848(1.1848) Grad: 2.4033 LR: 0.000038  
Epoch: [405][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2966(1.2402) Grad: 2.6195 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8449(1.8449) 


Epoch 405 - avg_train_loss: 1.2402  avg_val_loss: 1.8886  time: 3s
Epoch 405 - Score: 0.5604


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9628(1.8886) 
Epoch: [406][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1923(1.1923) Grad: 2.7379 LR: 0.000030  
Epoch: [406][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2398(1.1934) Grad: 2.6293 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8165(1.8165) 


Epoch 406 - avg_train_loss: 1.1934  avg_val_loss: 1.8602  time: 3s
Epoch 406 - Score: 0.5697
Epoch 406 - Save Best score: 0.5697 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9441(1.8602) 
Epoch: [407][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1407(1.1407) Grad: 2.5626 LR: 0.000088  
Epoch: [407][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2743(1.2027) Grad: 2.7406 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8537(1.8537) 


Epoch 407 - avg_train_loss: 1.2027  avg_val_loss: 1.8978  time: 4s
Epoch 407 - Score: 0.5646


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9731(1.8978) 
Epoch: [408][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1776(1.1776) Grad: 2.4875 LR: 0.000409  
Epoch: [408][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2507(1.2414) Grad: 2.5361 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9680(1.9680) 


Epoch 408 - avg_train_loss: 1.2414  avg_val_loss: 2.0224  time: 3s
Epoch 408 - Score: 0.5228


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0659(2.0224) 
Epoch: [409][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2245(1.2245) Grad: 2.5981 LR: 0.000406  
Epoch: [409][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2747(1.3231) Grad: 2.4761 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2858(2.2858) 


Epoch 409 - avg_train_loss: 1.3231  avg_val_loss: 2.3298  time: 3s
Epoch 409 - Score: 0.4576


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4010(2.3298) 
Epoch: [410][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1600(1.1600) Grad: 2.4845 LR: 0.000409  
Epoch: [410][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4613(1.4324) Grad: 2.8534 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.5171(2.5171) 


Epoch 410 - avg_train_loss: 1.4324  avg_val_loss: 2.5904  time: 4s
Epoch 410 - Score: 0.4039


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6595(2.5904) 
Epoch: [411][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4150(1.4150) Grad: 2.7758 LR: 0.000365  
Epoch: [411][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4166(1.4798) Grad: 2.7358 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5783(2.5783) 


Epoch 411 - avg_train_loss: 1.4798  avg_val_loss: 2.6091  time: 3s
Epoch 411 - Score: 0.3962


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7054(2.6091) 
Epoch: [412][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4930(1.4930) Grad: 2.7816 LR: 0.000278  
Epoch: [412][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6185(1.4642) Grad: 2.7664 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.5074(2.5074) 


Epoch 412 - avg_train_loss: 1.4642  avg_val_loss: 2.5794  time: 3s
Epoch 412 - Score: 0.3909


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6313(2.5794) 
Epoch: [413][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4835(1.4835) Grad: 2.9546 LR: 0.000174  
Epoch: [413][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3488(1.3994) Grad: 2.7876 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.0990(2.0990) 


Epoch 413 - avg_train_loss: 1.3994  avg_val_loss: 2.1129  time: 4s
Epoch 413 - Score: 0.5002


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2309(2.1129) 
Epoch: [414][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2539(1.2539) Grad: 2.6158 LR: 0.000085  
Epoch: [414][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4189(1.3084) Grad: 2.8193 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9055(1.9055) 


Epoch 414 - avg_train_loss: 1.3084  avg_val_loss: 1.9577  time: 3s
Epoch 414 - Score: 0.5458


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0798(1.9577) 
Epoch: [415][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1972(1.1972) Grad: 2.6104 LR: 0.000038  
Epoch: [415][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2697(1.2290) Grad: 2.6951 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8797(1.8797) 


Epoch 415 - avg_train_loss: 1.2290  avg_val_loss: 1.8968  time: 3s
Epoch 415 - Score: 0.5615


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9673(1.8968) 
Epoch: [416][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2597(1.2597) Grad: 2.5733 LR: 0.000030  
Epoch: [416][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.1308(1.1913) Grad: 2.6013 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.8011(1.8011) 


Epoch 416 - avg_train_loss: 1.1913  avg_val_loss: 1.8522  time: 4s
Epoch 416 - Score: 0.5685


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9223(1.8522) 
Epoch: [417][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2184(1.2184) Grad: 2.3956 LR: 0.000088  
Epoch: [417][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1812(1.1967) Grad: 2.6575 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8828(1.8828) 


Epoch 417 - avg_train_loss: 1.1967  avg_val_loss: 1.9010  time: 3s
Epoch 417 - Score: 0.5590


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9911(1.9010) 
Epoch: [418][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.0250(1.0250) Grad: 2.2485 LR: 0.000409  
Epoch: [418][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2231(1.2381) Grad: 2.5424 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9806(1.9806) 


Epoch 418 - avg_train_loss: 1.2381  avg_val_loss: 2.0069  time: 3s
Epoch 418 - Score: 0.5325


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0578(2.0069) 
Epoch: [419][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.2566(1.2566) Grad: 2.5228 LR: 0.000406  
Epoch: [419][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2207(1.3207) Grad: 2.5232 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2287(2.2287) 


Epoch 419 - avg_train_loss: 1.3207  avg_val_loss: 2.2431  time: 3s
Epoch 419 - Score: 0.4716


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2863(2.2431) 
Epoch: [420][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.2336(1.2336) Grad: 2.6456 LR: 0.000409  
Epoch: [420][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.6002(1.4291) Grad: 2.8941 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4916(2.4916) 


Epoch 420 - avg_train_loss: 1.4291  avg_val_loss: 2.5488  time: 3s
Epoch 420 - Score: 0.4030


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5742(2.5488) 
Epoch: [421][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4242(1.4242) Grad: 2.8615 LR: 0.000365  
Epoch: [421][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4893(1.4627) Grad: 2.7605 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 9s) Loss: 2.7328(2.7328) 


Epoch 421 - avg_train_loss: 1.4627  avg_val_loss: 2.7452  time: 3s
Epoch 421 - Score: 0.3626


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8585(2.7452) 
Epoch: [422][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4484(1.4484) Grad: 2.6416 LR: 0.000278  
Epoch: [422][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4323(1.4344) Grad: 2.9408 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3681(2.3681) 


Epoch 422 - avg_train_loss: 1.4344  avg_val_loss: 2.4289  time: 3s
Epoch 422 - Score: 0.4297


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4933(2.4289) 
Epoch: [423][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3935(1.3935) Grad: 2.5657 LR: 0.000174  
Epoch: [423][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4073(1.3898) Grad: 2.5813 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0965(2.0965) 


Epoch 423 - avg_train_loss: 1.3898  avg_val_loss: 2.1582  time: 4s
Epoch 423 - Score: 0.4942


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2445(2.1582) 
Epoch: [424][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4946(1.4946) Grad: 2.7465 LR: 0.000085  
Epoch: [424][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3744(1.2908) Grad: 2.7622 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9573(1.9573) 


Epoch 424 - avg_train_loss: 1.2908  avg_val_loss: 1.9879  time: 3s
Epoch 424 - Score: 0.5367


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0576(1.9879) 
Epoch: [425][0/138] Elapsed 0m 0s (remain 0m 23s) Loss: 1.1520(1.1520) Grad: 2.5798 LR: 0.000038  
Epoch: [425][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2697(1.2265) Grad: 2.7948 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8783(1.8783) 


Epoch 425 - avg_train_loss: 1.2265  avg_val_loss: 1.9070  time: 4s
Epoch 425 - Score: 0.5619


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9615(1.9070) 
Epoch: [426][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1308(1.1308) Grad: 2.4563 LR: 0.000030  
Epoch: [426][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2219(1.1922) Grad: 2.5155 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8699(1.8699) 


Epoch 426 - avg_train_loss: 1.1922  avg_val_loss: 1.8820  time: 4s
Epoch 426 - Score: 0.5628


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9342(1.8820) 
Epoch: [427][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1044(1.1044) Grad: 2.3734 LR: 0.000088  
Epoch: [427][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2194(1.1878) Grad: 2.4913 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8638(1.8638) 


Epoch 427 - avg_train_loss: 1.1878  avg_val_loss: 1.8806  time: 3s
Epoch 427 - Score: 0.5641


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9276(1.8806) 
Epoch: [428][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1527(1.1527) Grad: 2.7585 LR: 0.000409  
Epoch: [428][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1894(1.2304) Grad: 2.6115 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9768(1.9768) 


Epoch 428 - avg_train_loss: 1.2304  avg_val_loss: 2.0121  time: 3s
Epoch 428 - Score: 0.5281


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1099(2.0121) 
Epoch: [429][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2561(1.2561) Grad: 2.6900 LR: 0.000406  
Epoch: [429][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3562(1.3183) Grad: 2.9678 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1777(2.1777) 


Epoch 429 - avg_train_loss: 1.3183  avg_val_loss: 2.2823  time: 4s
Epoch 429 - Score: 0.4637


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3472(2.2823) 
Epoch: [430][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2656(1.2656) Grad: 2.7504 LR: 0.000409  
Epoch: [430][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4983(1.4084) Grad: 2.8723 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.4487(2.4487) 


Epoch 430 - avg_train_loss: 1.4084  avg_val_loss: 2.5277  time: 3s
Epoch 430 - Score: 0.4049


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6085(2.5277) 
Epoch: [431][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3506(1.3506) Grad: 2.7718 LR: 0.000365  
Epoch: [431][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5546(1.4555) Grad: 2.8057 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5973(2.5973) 


Epoch 431 - avg_train_loss: 1.4555  avg_val_loss: 2.6464  time: 3s
Epoch 431 - Score: 0.3790


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6692(2.6464) 
Epoch: [432][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4560(1.4560) Grad: 2.8062 LR: 0.000278  
Epoch: [432][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3358(1.4533) Grad: 2.6636 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.7024(2.7024) 


Epoch 432 - avg_train_loss: 1.4533  avg_val_loss: 2.7474  time: 4s
Epoch 432 - Score: 0.3673


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8458(2.7474) 
Epoch: [433][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4097(1.4097) Grad: 2.8362 LR: 0.000174  
Epoch: [433][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3909(1.3810) Grad: 2.7393 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2790(2.2790) 


Epoch 433 - avg_train_loss: 1.3810  avg_val_loss: 2.3896  time: 3s
Epoch 433 - Score: 0.4438


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4263(2.3896) 
Epoch: [434][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3645(1.3645) Grad: 2.5594 LR: 0.000085  
Epoch: [434][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2885(1.2915) Grad: 2.5927 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0066(2.0066) 


Epoch 434 - avg_train_loss: 1.2915  avg_val_loss: 2.0166  time: 4s
Epoch 434 - Score: 0.5320


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0390(2.0166) 
Epoch: [435][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2047(1.2047) Grad: 2.5407 LR: 0.000038  
Epoch: [435][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1917(1.2186) Grad: 2.6087 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.9014(1.9014) 


Epoch 435 - avg_train_loss: 1.2186  avg_val_loss: 1.8959  time: 4s
Epoch 435 - Score: 0.5559


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9526(1.8959) 
Epoch: [436][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2216(1.2216) Grad: 2.3696 LR: 0.000030  
Epoch: [436][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2920(1.1829) Grad: 2.5609 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8273(1.8273) 


Epoch 436 - avg_train_loss: 1.1829  avg_val_loss: 1.8570  time: 3s
Epoch 436 - Score: 0.5710
Epoch 436 - Save Best score: 0.5710 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9248(1.8570) 
Epoch: [437][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2225(1.2225) Grad: 2.4818 LR: 0.000088  
Epoch: [437][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1419(1.1903) Grad: 2.4726 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8673(1.8673) 


Epoch 437 - avg_train_loss: 1.1903  avg_val_loss: 1.8861  time: 3s
Epoch 437 - Score: 0.5631


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9496(1.8861) 
Epoch: [438][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1808(1.1808) Grad: 2.5344 LR: 0.000409  
Epoch: [438][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1714(1.2273) Grad: 2.7792 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.9521(1.9521) 


Epoch 438 - avg_train_loss: 1.2273  avg_val_loss: 1.9933  time: 4s
Epoch 438 - Score: 0.5307


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0271(1.9933) 
Epoch: [439][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1738(1.1738) Grad: 2.5701 LR: 0.000406  
Epoch: [439][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3137(1.3088) Grad: 2.6922 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 2.2745(2.2745) 


Epoch 439 - avg_train_loss: 1.3088  avg_val_loss: 2.2937  time: 3s
Epoch 439 - Score: 0.4573


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3765(2.2937) 
Epoch: [440][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4235(1.4235) Grad: 3.0950 LR: 0.000409  
Epoch: [440][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5059(1.4022) Grad: 2.9588 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.7932(2.7932) 


Epoch 440 - avg_train_loss: 1.4022  avg_val_loss: 2.8797  time: 3s
Epoch 440 - Score: 0.3422


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.9234(2.8797) 
Epoch: [441][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4907(1.4907) Grad: 2.9577 LR: 0.000365  
Epoch: [441][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4210(1.4650) Grad: 2.7401 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4838(2.4838) 


Epoch 441 - avg_train_loss: 1.4650  avg_val_loss: 2.6011  time: 3s
Epoch 441 - Score: 0.3890


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6392(2.6011) 
Epoch: [442][0/138] Elapsed 0m 0s (remain 0m 28s) Loss: 1.3820(1.3820) Grad: 3.2113 LR: 0.000278  
Epoch: [442][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4976(1.4630) Grad: 3.1697 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3865(2.3865) 


Epoch 442 - avg_train_loss: 1.4630  avg_val_loss: 2.4766  time: 4s
Epoch 442 - Score: 0.4192


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5465(2.4766) 
Epoch: [443][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3391(1.3391) Grad: 2.7086 LR: 0.000174  
Epoch: [443][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3529(1.3920) Grad: 2.5316 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.5773(2.5773) 


Epoch 443 - avg_train_loss: 1.3920  avg_val_loss: 2.5311  time: 4s
Epoch 443 - Score: 0.4229


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6589(2.5311) 
Epoch: [444][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3726(1.3726) Grad: 2.7778 LR: 0.000085  
Epoch: [444][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3208(1.3047) Grad: 2.8033 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0758(2.0758) 


Epoch 444 - avg_train_loss: 1.3047  avg_val_loss: 2.0917  time: 3s
Epoch 444 - Score: 0.5168


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1245(2.0917) 
Epoch: [445][0/138] Elapsed 0m 0s (remain 0m 27s) Loss: 1.3328(1.3328) Grad: 2.9042 LR: 0.000038  
Epoch: [445][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1887(1.2338) Grad: 2.3710 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9339(1.9339) 


Epoch 445 - avg_train_loss: 1.2338  avg_val_loss: 1.9260  time: 4s
Epoch 445 - Score: 0.5573


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0063(1.9260) 
Epoch: [446][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1483(1.1483) Grad: 2.6162 LR: 0.000030  
Epoch: [446][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1354(1.1861) Grad: 2.5363 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8283(1.8283) 


Epoch 446 - avg_train_loss: 1.1861  avg_val_loss: 1.8706  time: 3s
Epoch 446 - Score: 0.5669


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9236(1.8706) 
Epoch: [447][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.0805(1.0805) Grad: 2.4263 LR: 0.000088  
Epoch: [447][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2186(1.1890) Grad: 2.4028 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8705(1.8705) 


Epoch 447 - avg_train_loss: 1.1890  avg_val_loss: 1.9006  time: 3s
Epoch 447 - Score: 0.5605


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9786(1.9006) 
Epoch: [448][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1308(1.1308) Grad: 2.4762 LR: 0.000409  
Epoch: [448][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3036(1.2370) Grad: 2.6146 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 7s) Loss: 1.9281(1.9281) 


Epoch 448 - avg_train_loss: 1.2370  avg_val_loss: 1.9915  time: 4s
Epoch 448 - Score: 0.5328


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0890(1.9915) 
Epoch: [449][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2762(1.2762) Grad: 2.7304 LR: 0.000406  
Epoch: [449][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4101(1.3154) Grad: 2.8327 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1215(2.1215) 


Epoch 449 - avg_train_loss: 1.3154  avg_val_loss: 2.1534  time: 3s
Epoch 449 - Score: 0.4994


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2482(2.1534) 
Epoch: [450][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3855(1.3855) Grad: 2.6407 LR: 0.000409  
Epoch: [450][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4183(1.3992) Grad: 2.7996 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4241(2.4241) 


Epoch 450 - avg_train_loss: 1.3992  avg_val_loss: 2.4745  time: 3s
Epoch 450 - Score: 0.4126


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5365(2.4745) 
Epoch: [451][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3840(1.3840) Grad: 2.7514 LR: 0.000365  
Epoch: [451][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4052(1.4581) Grad: 2.7657 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3589(2.3589) 


Epoch 451 - avg_train_loss: 1.4581  avg_val_loss: 2.4649  time: 4s
Epoch 451 - Score: 0.4315


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5436(2.4649) 
Epoch: [452][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4798(1.4798) Grad: 2.8963 LR: 0.000278  
Epoch: [452][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.5169(1.4438) Grad: 2.9575 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3238(2.3238) 


Epoch 452 - avg_train_loss: 1.4438  avg_val_loss: 2.3783  time: 4s
Epoch 452 - Score: 0.4498


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3916(2.3783) 
Epoch: [453][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3003(1.3003) Grad: 2.5449 LR: 0.000174  
Epoch: [453][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2905(1.3730) Grad: 2.4418 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1809(2.1809) 


Epoch 453 - avg_train_loss: 1.3730  avg_val_loss: 2.2292  time: 3s
Epoch 453 - Score: 0.4745


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3193(2.2292) 
Epoch: [454][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2052(1.2052) Grad: 2.4028 LR: 0.000085  
Epoch: [454][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3287(1.2847) Grad: 2.5265 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9377(1.9377) 


Epoch 454 - avg_train_loss: 1.2847  avg_val_loss: 1.9644  time: 4s
Epoch 454 - Score: 0.5415


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0381(1.9644) 
Epoch: [455][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2877(1.2877) Grad: 2.9302 LR: 0.000038  
Epoch: [455][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2307(1.2190) Grad: 2.6150 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8720(1.8720) 


Epoch 455 - avg_train_loss: 1.2190  avg_val_loss: 1.9196  time: 3s
Epoch 455 - Score: 0.5583


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9889(1.9196) 
Epoch: [456][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1142(1.1142) Grad: 2.4140 LR: 0.000030  
Epoch: [456][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1285(1.1820) Grad: 2.4696 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8237(1.8237) 


Epoch 456 - avg_train_loss: 1.1820  avg_val_loss: 1.8522  time: 3s
Epoch 456 - Score: 0.5712
Epoch 456 - Save Best score: 0.5712 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9267(1.8522) 
Epoch: [457][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2061(1.2061) Grad: 2.7447 LR: 0.000088  
Epoch: [457][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2340(1.1754) Grad: 2.5252 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 13s) Loss: 1.8049(1.8049) 


Epoch 457 - avg_train_loss: 1.1754  avg_val_loss: 1.8686  time: 4s
Epoch 457 - Score: 0.5679


EVAL: [45/46] Elapsed 0m 1s (remain 0m 0s) Loss: 1.9329(1.8686) 
Epoch: [458][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.0487(1.0487) Grad: 2.4157 LR: 0.000409  
Epoch: [458][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1826(1.2228) Grad: 2.4419 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.9128(1.9128) 


Epoch 458 - avg_train_loss: 1.2228  avg_val_loss: 1.9557  time: 3s
Epoch 458 - Score: 0.5376


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0318(1.9557) 
Epoch: [459][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1987(1.1987) Grad: 2.6643 LR: 0.000406  
Epoch: [459][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3702(1.3056) Grad: 2.7472 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1577(2.1577) 


Epoch 459 - avg_train_loss: 1.3056  avg_val_loss: 2.2065  time: 3s
Epoch 459 - Score: 0.4776


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2524(2.2065) 
Epoch: [460][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2232(1.2232) Grad: 2.4844 LR: 0.000409  
Epoch: [460][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4473(1.3950) Grad: 3.0170 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.2579(2.2579) 


Epoch 460 - avg_train_loss: 1.3950  avg_val_loss: 2.3688  time: 3s
Epoch 460 - Score: 0.4481


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4816(2.3688) 
Epoch: [461][0/138] Elapsed 0m 0s (remain 0m 25s) Loss: 1.5079(1.5079) Grad: 3.0835 LR: 0.000365  
Epoch: [461][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.5381(1.4415) Grad: 3.0113 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6104(2.6104) 


Epoch 461 - avg_train_loss: 1.4415  avg_val_loss: 2.6720  time: 4s
Epoch 461 - Score: 0.3892


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6942(2.6720) 
Epoch: [462][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3577(1.3577) Grad: 2.7722 LR: 0.000278  
Epoch: [462][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4736(1.4388) Grad: 2.7070 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.7953(2.7953) 


Epoch 462 - avg_train_loss: 1.4388  avg_val_loss: 2.8107  time: 3s
Epoch 462 - Score: 0.3557


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8777(2.8107) 
Epoch: [463][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4179(1.4179) Grad: 2.9343 LR: 0.000174  
Epoch: [463][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5258(1.3653) Grad: 3.0141 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2516(2.2516) 


Epoch 463 - avg_train_loss: 1.3653  avg_val_loss: 2.2777  time: 3s
Epoch 463 - Score: 0.4627


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4152(2.2777) 
Epoch: [464][0/138] Elapsed 0m 0s (remain 0m 31s) Loss: 1.3754(1.3754) Grad: 2.4781 LR: 0.000085  
Epoch: [464][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2573(1.2828) Grad: 2.6455 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0525(2.0525) 


Epoch 464 - avg_train_loss: 1.2828  avg_val_loss: 2.0752  time: 4s
Epoch 464 - Score: 0.5159


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1624(2.0752) 
Epoch: [465][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.1730(1.1730) Grad: 2.4875 LR: 0.000038  
Epoch: [465][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3571(1.2100) Grad: 2.8327 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8359(1.8359) 


Epoch 465 - avg_train_loss: 1.2100  avg_val_loss: 1.8776  time: 3s
Epoch 465 - Score: 0.5609


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9220(1.8776) 
Epoch: [466][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1143(1.1143) Grad: 2.5380 LR: 0.000030  
Epoch: [466][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2624(1.1758) Grad: 2.5852 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.8315(1.8315) 


Epoch 466 - avg_train_loss: 1.1758  avg_val_loss: 1.8520  time: 4s
Epoch 466 - Score: 0.5715
Epoch 466 - Save Best score: 0.5715 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9064(1.8520) 
Epoch: [467][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.0827(1.0827) Grad: 2.2423 LR: 0.000088  
Epoch: [467][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1574(1.1706) Grad: 2.3740 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8517(1.8517) 


Epoch 467 - avg_train_loss: 1.1706  avg_val_loss: 1.8994  time: 4s
Epoch 467 - Score: 0.5625


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9580(1.8994) 
Epoch: [468][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.0955(1.0955) Grad: 2.3786 LR: 0.000409  
Epoch: [468][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2764(1.2198) Grad: 2.5752 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9035(1.9035) 


Epoch 468 - avg_train_loss: 1.2198  avg_val_loss: 1.9644  time: 3s
Epoch 468 - Score: 0.5397


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0028(1.9644) 
Epoch: [469][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1982(1.1982) Grad: 2.4391 LR: 0.000406  
Epoch: [469][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3653(1.3062) Grad: 2.7752 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0958(2.0958) 


Epoch 469 - avg_train_loss: 1.3062  avg_val_loss: 2.1498  time: 3s
Epoch 469 - Score: 0.4884


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2347(2.1498) 
Epoch: [470][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2440(1.2440) Grad: 2.7061 LR: 0.000409  
Epoch: [470][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3871(1.3848) Grad: 2.8478 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.7313(2.7313) 


Epoch 470 - avg_train_loss: 1.3848  avg_val_loss: 2.7911  time: 4s
Epoch 470 - Score: 0.3554


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8407(2.7911) 
Epoch: [471][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.4158(1.4158) Grad: 2.7366 LR: 0.000365  
Epoch: [471][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4743(1.4428) Grad: 2.7615 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4686(2.4686) 


Epoch 471 - avg_train_loss: 1.4428  avg_val_loss: 2.5115  time: 3s
Epoch 471 - Score: 0.4107


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5463(2.5115) 
Epoch: [472][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4301(1.4301) Grad: 2.7025 LR: 0.000278  
Epoch: [472][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.5567(1.4299) Grad: 2.6036 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6956(2.6956) 


Epoch 472 - avg_train_loss: 1.4299  avg_val_loss: 2.7648  time: 3s
Epoch 472 - Score: 0.3633


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.7969(2.7648) 
Epoch: [473][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.3740(1.3740) Grad: 2.4140 LR: 0.000174  
Epoch: [473][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3539(1.3667) Grad: 2.6033 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.2557(2.2557) 


Epoch 473 - avg_train_loss: 1.3667  avg_val_loss: 2.2976  time: 4s
Epoch 473 - Score: 0.4603


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3464(2.2976) 
Epoch: [474][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3749(1.3749) Grad: 2.7232 LR: 0.000085  
Epoch: [474][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3054(1.2743) Grad: 2.8265 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9381(1.9381) 


Epoch 474 - avg_train_loss: 1.2743  avg_val_loss: 1.9943  time: 3s
Epoch 474 - Score: 0.5301


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0757(1.9943) 
Epoch: [475][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1884(1.1884) Grad: 2.3993 LR: 0.000038  
Epoch: [475][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1889(1.2134) Grad: 2.4205 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.9157(1.9157) 


Epoch 475 - avg_train_loss: 1.2134  avg_val_loss: 1.9178  time: 3s
Epoch 475 - Score: 0.5596


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0275(1.9178) 
Epoch: [476][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.0517(1.0517) Grad: 2.4408 LR: 0.000030  
Epoch: [476][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1504(1.1719) Grad: 2.6287 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.8418(1.8418) 


Epoch 476 - avg_train_loss: 1.1719  avg_val_loss: 1.8768  time: 4s
Epoch 476 - Score: 0.5653


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9593(1.8768) 
Epoch: [477][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1101(1.1101) Grad: 2.4655 LR: 0.000088  
Epoch: [477][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 0.9933(1.1681) Grad: 2.3692 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8413(1.8413) 


Epoch 477 - avg_train_loss: 1.1681  avg_val_loss: 1.8764  time: 3s
Epoch 477 - Score: 0.5668


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9866(1.8764) 
Epoch: [478][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2555(1.2555) Grad: 2.5314 LR: 0.000409  
Epoch: [478][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1029(1.2134) Grad: 2.3822 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.1390(2.1390) 


Epoch 478 - avg_train_loss: 1.2134  avg_val_loss: 2.1226  time: 3s
Epoch 478 - Score: 0.5107


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2078(2.1226) 
Epoch: [479][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.2664(1.2664) Grad: 2.6922 LR: 0.000406  
Epoch: [479][137/138] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2178(1.2915) Grad: 2.5865 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3141(2.3141) 


Epoch 479 - avg_train_loss: 1.2915  avg_val_loss: 2.3444  time: 4s
Epoch 479 - Score: 0.4563


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.4856(2.3444) 
Epoch: [480][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2700(1.2700) Grad: 2.7021 LR: 0.000409  
Epoch: [480][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4599(1.3858) Grad: 2.7539 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.8087(2.8087) 


Epoch 480 - avg_train_loss: 1.3858  avg_val_loss: 2.8394  time: 3s
Epoch 480 - Score: 0.3771


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8267(2.8394) 
Epoch: [481][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.5109(1.5109) Grad: 2.6770 LR: 0.000365  
Epoch: [481][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4207(1.4419) Grad: 3.0520 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6429(2.6429) 


Epoch 481 - avg_train_loss: 1.4419  avg_val_loss: 2.7749  time: 3s
Epoch 481 - Score: 0.3641


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.8798(2.7749) 
Epoch: [482][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.4016(1.4016) Grad: 2.8601 LR: 0.000278  
Epoch: [482][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4498(1.4356) Grad: 2.8364 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.4596(2.4596) 


Epoch 482 - avg_train_loss: 1.4356  avg_val_loss: 2.5192  time: 4s
Epoch 482 - Score: 0.4057


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.5815(2.5192) 
Epoch: [483][0/138] Elapsed 0m 0s (remain 0m 29s) Loss: 1.4429(1.4429) Grad: 2.7290 LR: 0.000174  
Epoch: [483][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2991(1.3650) Grad: 2.6650 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.0659(2.0659) 


Epoch 483 - avg_train_loss: 1.3650  avg_val_loss: 2.1824  time: 3s
Epoch 483 - Score: 0.4898


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2727(2.1824) 
Epoch: [484][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.2310(1.2310) Grad: 2.3501 LR: 0.000085  
Epoch: [484][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2413(1.2810) Grad: 2.5963 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.0094(2.0094) 


Epoch 484 - avg_train_loss: 1.2810  avg_val_loss: 2.0159  time: 3s
Epoch 484 - Score: 0.5316


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0785(2.0159) 
Epoch: [485][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3674(1.3674) Grad: 2.6127 LR: 0.000038  
Epoch: [485][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.0902(1.2021) Grad: 2.3881 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8538(1.8538) 


Epoch 485 - avg_train_loss: 1.2021  avg_val_loss: 1.8848  time: 3s
Epoch 485 - Score: 0.5590


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9893(1.8848) 
Epoch: [486][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1034(1.1034) Grad: 2.3495 LR: 0.000030  
Epoch: [486][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2606(1.1599) Grad: 2.5981 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8330(1.8330) 


Epoch 486 - avg_train_loss: 1.1599  avg_val_loss: 1.8594  time: 4s
Epoch 486 - Score: 0.5692


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9506(1.8594) 
Epoch: [487][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1735(1.1735) Grad: 2.5296 LR: 0.000088  
Epoch: [487][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.0958(1.1667) Grad: 2.4556 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8725(1.8725) 


Epoch 487 - avg_train_loss: 1.1667  avg_val_loss: 1.8946  time: 3s
Epoch 487 - Score: 0.5654


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9728(1.8946) 
Epoch: [488][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1516(1.1516) Grad: 2.5622 LR: 0.000409  
Epoch: [488][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2660(1.2081) Grad: 2.8201 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9050(1.9050) 


Epoch 488 - avg_train_loss: 1.2081  avg_val_loss: 1.9600  time: 4s
Epoch 488 - Score: 0.5366


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0498(1.9600) 
Epoch: [489][0/138] Elapsed 0m 0s (remain 0m 22s) Loss: 1.1743(1.1743) Grad: 2.6717 LR: 0.000406  
Epoch: [489][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3652(1.2920) Grad: 2.6316 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3248(2.3248) 


Epoch 489 - avg_train_loss: 1.2920  avg_val_loss: 2.3339  time: 4s
Epoch 489 - Score: 0.4628


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3964(2.3339) 
Epoch: [490][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1901(1.1901) Grad: 2.7051 LR: 0.000409  
Epoch: [490][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4383(1.3909) Grad: 2.9624 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.6271(2.6271) 


Epoch 490 - avg_train_loss: 1.3909  avg_val_loss: 2.6553  time: 3s
Epoch 490 - Score: 0.3861


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6397(2.6553) 
Epoch: [491][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.4947(1.4947) Grad: 3.1320 LR: 0.000365  
Epoch: [491][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4883(1.4503) Grad: 2.9926 LR: 0.000365  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.4838(2.4838) 


Epoch 491 - avg_train_loss: 1.4503  avg_val_loss: 2.5448  time: 3s
Epoch 491 - Score: 0.4021


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.6069(2.5448) 
Epoch: [492][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.5488(1.5488) Grad: 3.3602 LR: 0.000278  
Epoch: [492][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4569(1.4262) Grad: 2.9769 LR: 0.000278  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.3603(2.3603) 


Epoch 492 - avg_train_loss: 1.4262  avg_val_loss: 2.3805  time: 4s
Epoch 492 - Score: 0.4509


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.3415(2.3805) 
Epoch: [493][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.3757(1.3757) Grad: 2.8299 LR: 0.000174  
Epoch: [493][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4409(1.3660) Grad: 3.0542 LR: 0.000174  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 2.0879(2.0879) 


Epoch 493 - avg_train_loss: 1.3660  avg_val_loss: 2.1583  time: 3s
Epoch 493 - Score: 0.4896


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.1638(2.1583) 
Epoch: [494][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.3519(1.3519) Grad: 2.9113 LR: 0.000085  
Epoch: [494][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3439(1.2737) Grad: 2.6978 LR: 0.000085  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 2.0167(2.0167) 


Epoch 494 - avg_train_loss: 1.2737  avg_val_loss: 2.0193  time: 3s
Epoch 494 - Score: 0.5268


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0822(2.0193) 
Epoch: [495][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1841(1.1841) Grad: 2.3456 LR: 0.000038  
Epoch: [495][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2177(1.2006) Grad: 2.4775 LR: 0.000038  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 8s) Loss: 1.8448(1.8448) 


Epoch 495 - avg_train_loss: 1.2006  avg_val_loss: 1.8810  time: 4s
Epoch 495 - Score: 0.5652


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9649(1.8810) 
Epoch: [496][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.1139(1.1139) Grad: 2.6799 LR: 0.000030  
Epoch: [496][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1829(1.1611) Grad: 2.6820 LR: 0.000030  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 1.8165(1.8165) 


Epoch 496 - avg_train_loss: 1.1611  avg_val_loss: 1.8425  time: 3s
Epoch 496 - Score: 0.5741
Epoch 496 - Save Best score: 0.5741 Model


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9092(1.8425) 
Epoch: [497][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1190(1.1190) Grad: 2.6025 LR: 0.000088  
Epoch: [497][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2012(1.1642) Grad: 2.5123 LR: 0.000088  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.8624(1.8624) 


Epoch 497 - avg_train_loss: 1.1642  avg_val_loss: 1.8865  time: 4s
Epoch 497 - Score: 0.5660


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 1.9587(1.8865) 
Epoch: [498][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.0920(1.0920) Grad: 2.4739 LR: 0.000409  
Epoch: [498][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3355(1.2000) Grad: 2.6226 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 1.9032(1.9032) 


Epoch 498 - avg_train_loss: 1.2000  avg_val_loss: 1.9428  time: 4s
Epoch 498 - Score: 0.5397


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.0023(1.9428) 
Epoch: [499][0/138] Elapsed 0m 0s (remain 0m 20s) Loss: 1.2542(1.2542) Grad: 2.6839 LR: 0.000406  
Epoch: [499][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3347(1.2862) Grad: 2.8258 LR: 0.000406  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 5s) Loss: 2.1573(2.1573) 


Epoch 499 - avg_train_loss: 1.2862  avg_val_loss: 2.2256  time: 3s
Epoch 499 - Score: 0.4750


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 2.2692(2.2256) 
Epoch: [500][0/138] Elapsed 0m 0s (remain 0m 21s) Loss: 1.1709(1.1709) Grad: 2.4774 LR: 0.000409  
Epoch: [500][137/138] Elapsed 0m 2s (remain 0m 0s) Loss: 1.4309(1.3794) Grad: 2.9606 LR: 0.000409  
EVAL: [0/46] Elapsed 0m 0s (remain 0m 6s) Loss: 3.1149(3.1149) 


Epoch 500 - avg_train_loss: 1.3794  avg_val_loss: 3.1407  time: 3s
Epoch 500 - Score: 0.3171
Our CV score is 0.574130748958599


EVAL: [45/46] Elapsed 0m 0s (remain 0m 0s) Loss: 3.1870(3.1407) 


# Tensorflow Conversion¶


In [16]:
class Model_infe(ASLLinearModel):
    def __init__(self):
        super().__init__(
            in_features=3864,
            first_out_features=1024,
            num_classes=250,
            num_blocks=3,
            drop_rate=0.4,
        )

    def forward(self, x):
        feature = self.model(x)
        probs = self.fc_probs(feature)
        return probs


model_infe = Model_infe()
model_infe.load_state_dict(
    torch.load(OUTPUT_DIR + f"{CFG.model_name}_best_score_version{VERSION}.pth"),
    strict=False,
)
model_infe = model_infe.to(device)

Sequential(
  (0): Sequential(
    (0): Linear(in_features=3864, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
  )
  (1): Sequential(
    (0): Linear(in_features=1024, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
  )
  (2): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
  )
)


In [17]:
sample_input = torch.rand((1, 3864)).to(device)
onnx_model_path = "asl_model.onnx"

model_infe.eval()

torch.onnx.export(
    model_infe,  # PyTorch Model
    sample_input,  # Input tensor
    onnx_model_path,  # Output file (eg. 'output_model.onnx')
    opset_version=12,  # Operator support version
    input_names=["input"],  # Input tensor name (arbitary)
    output_names=["output"],  # Output tensor name (arbitary)
    dynamic_axes={"input": {0: "input"}},
)

In [18]:
import onnx
from onnx_tf.backend import prepare

tf_model_path = "/kaggle/working/tf_model"
onnx_model = onnx.load(onnx_model_path)
tf_rep = prepare(onnx_model)
tf_rep.export_graph(tf_model_path)

# Final Inference Model in Tensorflow

In [19]:
import tensorflow as tf


class ASLInferModel(tf.Module):
    def __init__(self):
        super(ASLInferModel, self).__init__()
        self.feature_gen = FeatureGen()
        self.model = tf.saved_model.load(tf_model_path)
        self.feature_gen.trainable = False
        self.model.trainable = False

    @tf.function(
        input_signature=[
            tf.TensorSpec(shape=[None, 543, 2], dtype=tf.float32, name="inputs")
        ]
    )
    def call(self, input):
        output_tensors = {}
        features = self.feature_gen(tf.cast(input, dtype=tf.float32))
        output_tensors["outputs"] = self.model(**{"input": features})["output"][0, :]
        return output_tensors

mytfmodel = ASLInferModel()
tf.saved_model.save(
    mytfmodel,
    "/kaggle/working/tf_infer_model",
    signatures={"serving_default": mytfmodel.call},
)

# Submission

In [20]:
# Convert the model

tf_infer_model_path = "/kaggle/working/tf_infer_model"
converter = tf.lite.TFLiteConverter.from_saved_model(tf_infer_model_path)
tflite_model = converter.convert()

tflite_model_path = "model.tflite"

# Save the model
with open(tflite_model_path, "wb") as f:
    f.write(tflite_model)

In [21]:
ROWS_PER_FRAME = 543  # number of landmarks per frame
pq_path = "/kaggle/input/asl-signs/train_landmark_files/53618/1001379621.parquet"

import tflite_runtime.interpreter as tflite

interpreter = tflite.Interpreter(tflite_model_path)
interpreter.allocate_tensors()

found_signatures = list(interpreter.get_signature_list().keys())

# if REQUIRED_SIGNATURE not in found_signatures:
#     raise KernelEvalException('Required input signature not found.')

prediction_fn = interpreter.get_signature_runner("serving_default")
output = prediction_fn(inputs=load_relevant_data_subset(pq_path))
sign = np.argmax(output["outputs"])

print(sign, output["outputs"].shape)

30 (250,)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [22]:
!zip submission.zip $tflite_model_path

  adding: model.tflite (deflated 7%)
