In [20]:

import os
import random
import sys
from glob import glob
import re
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from tqdm import tqdm
from time import time
import torch
import torch.utils.data as data

import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from efficientnet_pytorch import EfficientNet
import copy
from adamp import AdamP
import matplotlib.pyplot as plt
import seaborn as sns

print ("PyTorch version:[%s]."%(torch.__version__))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print ("device:[%s]."%(device))

def seed_everything(seed):
    """
    동일한 조건으로 학습을 할 때, 동일한 결과를 얻기 위해 seed를 고정시킵니다.
    
    Args:
        seed: seed 정수값
    """
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

seed_everything(42)


IMG_EXTENSIONS = [
    ".jpg", ".JPG", ".jpeg", ".JPEG", ".png",
    ".PNG", ".ppm", ".PPM", ".bmp", ".BMP",
]


def is_image_file(filename):
    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)

class MaskLabels:
    mask = 0
    incorrect = 1
    normal = 2

class GenderLabels:
    male = 0
    female = 1


class AgeGroup:
    map_label = lambda x: 0 if int(x) < 30 else 1 if int(x) < 58 else 2


class MaskBaseDataset(data.Dataset):
    num_classes = 3 * 2 * 3

    _file_names = {
        "mask1": MaskLabels.mask,
        "mask2": MaskLabels.mask,
        "mask3": MaskLabels.mask,
        "mask4": MaskLabels.mask,
        "mask5": MaskLabels.mask,
        "incorrect_mask": MaskLabels.incorrect,
        "normal": MaskLabels.normal
    }

    image_paths = []
    mask_labels = []
    gender_labels = []
    age_labels = []

    def __init__(self, img_dir, transform=None):
        """
        MaskBaseDataset을 initialize 합니다.

        Args:
            img_dir: 학습 이미지 폴더의 root directory 입니다.
            transform: Augmentation을 하는 함수입니다.
        """
        self.img_dir = img_dir
        self.transform = transform

        self.setup()

    def set_transform(self, transform):
        """
        transform 함수를 설정하는 함수입니다.
        """
        self.transform = transform
        
    def setup(self):
        """
        image의 경로와 각 이미지들의 label을 계산하여 저장해두는 함수입니다.
        """
        profiles = os.listdir(self.img_dir)
        for profile in profiles:
            if profile.startswith("."):  # "." 로 시작하는 파일은 무시합니다
                continue
            img_folder = os.path.join(self.img_dir, profile)
            for file_name in os.listdir(img_folder):
                _file_name, ext = os.path.splitext(file_name)
                if _file_name not in self._file_names:  # "." 로 시작하는 파일 및 invalid 한 파일들은 무시합니다
                    continue
                img_path = os.path.join(self.img_dir, profile, file_name)  # (resized_data, 000004_male_Asian_54, mask1.jpg)
                self.image_paths.append(img_path)
                mask_label = self._file_names[_file_name]
                self.mask_labels.append(mask_label)
                id, gender, race, age = profile.split("_")
                gender_label = getattr(GenderLabels, gender)
                age_label = AgeGroup.map_label(age)

                self.gender_labels.append(gender_label)
                self.age_labels.append(age_label)
        '''
        추가 데이터를 불러오는 라인이지만, 외부데이터 추가시 f1 score가 더 안나오는 현상이 발생하여 뺌
        profiles = os.listdir('./input/data/train/addition')
        for profile in profiles:
            img_folder = os.path.join('./input/data/train/addition', profile)
            for file_name in os.listdir(img_folder):
                if file_name.startswith("."):
                    continue
                img_path = os.path.join('./input/data/train/addition', profile, file_name)
                self.image_paths.append(img_path)
                id,gender,race,age,mask_label = profile.split("_")
                gender_label = getattr(GenderLabels, gender)
                age_label = AgeGroup.map_label(age)
                self.mask_labels.append(int(mask_label))
                self.gender_labels.append(gender_label)
                self.age_labels.append(age_label)
        '''
    def __getitem__(self, index):
        """
        데이터를 불러오는 함수입니다. 
        데이터셋 class에 데이터 정보가 저장되어 있고, index를 통해 해당 위치에 있는 데이터 정보를 불러옵니다.
        Args:
            index: 불러올 데이터의 인덱스값입니다.
        """
        # 이미지를 불러옵니다.
        image_path = self.image_paths[index]
        image = Image.open(image_path)
        
        # 레이블을 불러옵니다.
        mask_label = self.mask_labels[index]
        gender_label = self.gender_labels[index]
        age_label = self.age_labels[index]
        # 각각의 label들을 모두 return해주어 각각의 loss를 따로 구한다.
        # 이미지를 Augmentation 시킵니다.
        image_transform = self.transform(image)
        return image_transform, mask_label,gender_label,age_label

    def __len__(self):
        return len(self.image_paths)


class cfg:
    data_dir = '../input/data/train'
    img_dir = f'{data_dir}/images'
    df_path = f'{data_dir}/train.csv'


In [7]:
import torch.nn.init as init

def initialize_weights(model):
    """
    Initialize all weights using xavier uniform. 
    For more weight initialization methods, check https://pytorch.org/docs/stable/nn.init.html
    """
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            init.xavier_uniform_(m.weight.data)
            if m.bias is not None:
                m.bias.data.zero_()
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data.fill_(1)
            m.bias.data.zero_()
        elif isinstance(m, nn.Linear):
            m.weight.data.normal_(0, 0.01)
            m.bias.data.zero_()

In [8]:
import math
import torch.optim.lr_scheduler as lr_scheduler

class CosineAnnealingWarmUpRestart(lr_scheduler._LRScheduler):
    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CosineAnnealingWarmUpRestart, self).__init__(optimizer, last_epoch)
    
    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch
                
        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

In [4]:
from torchvision import datasets, models, transforms
from torchvision.transforms import Resize, ToTensor, Normalize,CenterCrop

#원래는 transform으로 아주 다양한 방법들을 모두 적용해 보았지만 결국에는 대부분을 뺐을때 성능이 가장 잘 나왔다.
transform = transforms.Compose([
            ToTensor(),
    Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246))
        ])
dataset = MaskBaseDataset(
    img_dir=cfg.img_dir,transform=transform
)


n_val = int(len(dataset) * 0.2)
n_train = len(dataset) - n_val
train_dataset, val_dataset = data.random_split(dataset, [n_train, n_val])


train_loader = data.DataLoader(
    train_dataset,
    batch_size=16,
    num_workers=4,
    shuffle=True
)

val_loader = data.DataLoader(
    val_dataset,
    batch_size=16,
    num_workers=4,
    shuffle=False
)

save_dir = os.path.join('model', 'efficient-b4')
model = EfficientNet.from_pretrained('efficientnet-b4')
model._fc = nn.Linear(1792, 18)
initialize_weights(model._fc)
model_ft = model.to(device)
criterion1 = nn.CrossEntropyLoss()
criterion2 = FocalLoss()
optimizer = AdamP(model.parameters(), lr=0.000001, betas=(0.9, 0.999), weight_decay=1e-4)
#scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=5000, T_mult=2, eta_min=0.00005)
#scheduler = CosineAnnealingWarmUpRestart(optimizer, T_0=3000, T_mult=1, eta_max=0.001,  T_up=1000, gamma=0.5)


for epoch in range(5):
    model_ft.train()
    avg_loss = 0
    for inputs, mask_label,gender_label,age_label in tqdm(train_loader):
        inputs = inputs.to(device)
        mask_label = mask_label.to(device)
        gender_label = gender_label.to(device)
        age_label = age_label.to(device)
        outputs = model_ft(inputs)
        optimizer.zero_grad()
        
        loss1 = criterion1(outputs[:,:3],mask_label)
        #loss2 = criterion2(outputs[:,:3],mask_label)
        loss3 = criterion1(outputs[:,3:5],gender_label)
        #loss4 = criterion2(outputs[:,3:5],gender_label)
        loss5 = criterion1(outputs[:,5:8],age_label)
        #loss6 = criterion2(outputs[:,5:8],age_label)
        #loss = ((0.5*loss1)+(0.5*loss2)+(0.5*loss3)+(0.5*loss4)+(0.5*loss5)+(0.5*loss6))/3
        loss = (loss1+loss3+loss5)/3
        loss.backward()
        optimizer.step()
        scheduler.step()
        avg_loss+=loss.item()
    print(avg_loss/len(train_loader))


'''
        # val loop
        with torch.no_grad():
            print("Calculating validation results...")
            model.eval()
            val_loss_items = []
            val_acc_items = []
            figure = None
            for val_batch in val_loader:
                inputs, labels = val_batch
                inputs = inputs.to(device)
                labels = labels.to(device)
                outs = model(inputs)
                preds = torch.argmax(outs, dim=-1)
                loss_item = criterion(outs, labels).item()
                acc_item = (labels == preds).sum().item()
                val_loss_items.append(loss_item)
                val_acc_items.append(acc_item)
            val_loss = np.sum(val_loss_items) / len(val_loader)
            val_acc = np.sum(val_acc_items) / len(val_dataset)
            best_val_loss = min(best_val_loss, val_loss)
            if val_acc > best_val_acc:
                print(f"New best model for val accuracy : {val_acc:4.2%}! saving the best model..")
                torch.save(model.state_dict(), f"{save_dir}/best.pth")
                best_val_acc = val_acc
                counter = 0
            else:
                counter+=1
            torch.save(model.state_dict(), f"{save_dir}/last.pth")
            print(
                f"[Val] acc : {val_acc:4.2%}, loss: {val_loss:4.2} || "
                f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
            )
            if counter > 2:
                print("Early Stopping...")
                break
'''


PyTorch version:[1.6.0].
device:[cuda:0].
Loaded pretrained weights for efficientnet-b4


  0%|          | 10/2835 [00:18<1:21:23,  1.73s/it]

Epoch[0/10](10/2835) || training loss 0.97 || training accuracy 3.33% || lr 1.0990000000000002e-05


  1%|          | 20/2835 [00:35<1:21:46,  1.74s/it]

Epoch[0/10](20/2835) || training loss 0.9685 || training accuracy 5.83% || lr 2.0980000000000002e-05


  1%|          | 30/2835 [00:53<1:20:45,  1.73s/it]

Epoch[0/10](30/2835) || training loss 0.967 || training accuracy 10.83% || lr 3.097e-05


  1%|▏         | 40/2835 [01:10<1:20:29,  1.73s/it]

Epoch[0/10](40/2835) || training loss 0.9678 || training accuracy 13.33% || lr 4.096e-05


  2%|▏         | 50/2835 [01:27<1:18:20,  1.69s/it]

Epoch[0/10](50/2835) || training loss 0.9663 || training accuracy 25.00% || lr 5.0950000000000005e-05


  2%|▏         | 60/2835 [01:44<1:19:04,  1.71s/it]

Epoch[0/10](60/2835) || training loss 0.9645 || training accuracy 37.50% || lr 6.094e-05


  2%|▏         | 70/2835 [02:02<1:21:08,  1.76s/it]

Epoch[0/10](70/2835) || training loss 0.9642 || training accuracy 48.33% || lr 7.093e-05


  3%|▎         | 80/2835 [02:19<1:21:31,  1.78s/it]

Epoch[0/10](80/2835) || training loss 0.9543 || training accuracy 47.50% || lr 8.092e-05


  3%|▎         | 90/2835 [02:37<1:19:52,  1.75s/it]

Epoch[0/10](90/2835) || training loss 0.9437 || training accuracy 52.50% || lr 9.091e-05


  4%|▎         | 100/2835 [02:54<1:19:29,  1.74s/it]

Epoch[0/10](100/2835) || training loss 0.9179 || training accuracy 62.50% || lr 0.00010090000000000001


  4%|▍         | 110/2835 [03:12<1:17:07,  1.70s/it]

Epoch[0/10](110/2835) || training loss 0.8935 || training accuracy 65.00% || lr 0.00011089000000000002


  4%|▍         | 120/2835 [03:29<1:20:28,  1.78s/it]

Epoch[0/10](120/2835) || training loss 0.876 || training accuracy 55.83% || lr 0.00012088000000000001


  5%|▍         | 130/2835 [03:46<1:17:26,  1.72s/it]

Epoch[0/10](130/2835) || training loss 0.8491 || training accuracy 55.83% || lr 0.00013087


  5%|▍         | 140/2835 [04:03<1:17:17,  1.72s/it]

Epoch[0/10](140/2835) || training loss 0.8025 || training accuracy 66.67% || lr 0.00014086


  5%|▌         | 150/2835 [04:20<1:17:55,  1.74s/it]

Epoch[0/10](150/2835) || training loss 0.82 || training accuracy 50.83% || lr 0.00015085


  6%|▌         | 160/2835 [04:38<1:18:02,  1.75s/it]

Epoch[0/10](160/2835) || training loss 0.8182 || training accuracy 60.00% || lr 0.00016084


  6%|▌         | 170/2835 [04:55<1:19:08,  1.78s/it]

Epoch[0/10](170/2835) || training loss 0.8095 || training accuracy 52.50% || lr 0.00017083000000000002


  6%|▋         | 180/2835 [05:13<1:15:53,  1.72s/it]

Epoch[0/10](180/2835) || training loss 0.7932 || training accuracy 57.50% || lr 0.00018082


  7%|▋         | 190/2835 [05:30<1:15:55,  1.72s/it]

Epoch[0/10](190/2835) || training loss 0.7717 || training accuracy 60.83% || lr 0.00019081


  7%|▋         | 200/2835 [05:47<1:15:43,  1.72s/it]

Epoch[0/10](200/2835) || training loss 0.7741 || training accuracy 68.33% || lr 0.00020080000000000003


  7%|▋         | 210/2835 [06:05<1:15:54,  1.73s/it]

Epoch[0/10](210/2835) || training loss 0.7685 || training accuracy 68.33% || lr 0.00021079000000000003


  8%|▊         | 220/2835 [06:22<1:15:05,  1.72s/it]

Epoch[0/10](220/2835) || training loss 0.7649 || training accuracy 70.00% || lr 0.00022078000000000004


  8%|▊         | 230/2835 [06:39<1:15:10,  1.73s/it]

Epoch[0/10](230/2835) || training loss 0.7555 || training accuracy 75.83% || lr 0.00023077000000000001


  8%|▊         | 240/2835 [06:57<1:15:33,  1.75s/it]

Epoch[0/10](240/2835) || training loss 0.7501 || training accuracy 66.67% || lr 0.00024076000000000002


  9%|▉         | 250/2835 [07:14<1:15:00,  1.74s/it]

Epoch[0/10](250/2835) || training loss 0.7558 || training accuracy 67.50% || lr 0.00025075000000000005


  9%|▉         | 260/2835 [07:32<1:15:00,  1.75s/it]

Epoch[0/10](260/2835) || training loss 0.7772 || training accuracy 63.33% || lr 0.00026074000000000003


 10%|▉         | 270/2835 [07:49<1:13:56,  1.73s/it]

Epoch[0/10](270/2835) || training loss 0.7561 || training accuracy 63.33% || lr 0.00027073000000000006


 10%|▉         | 280/2835 [08:06<1:12:44,  1.71s/it]

Epoch[0/10](280/2835) || training loss 0.7311 || training accuracy 78.33% || lr 0.00028072000000000004


 10%|█         | 290/2835 [08:23<1:13:35,  1.74s/it]

Epoch[0/10](290/2835) || training loss 0.7132 || training accuracy 70.83% || lr 0.00029071000000000007


 11%|█         | 300/2835 [08:41<1:13:44,  1.75s/it]

Epoch[0/10](300/2835) || training loss 0.7822 || training accuracy 64.17% || lr 0.00030070000000000004


 11%|█         | 310/2835 [08:58<1:13:29,  1.75s/it]

Epoch[0/10](310/2835) || training loss 0.7475 || training accuracy 70.83% || lr 0.00031069


 11%|█▏        | 320/2835 [09:16<1:12:19,  1.73s/it]

Epoch[0/10](320/2835) || training loss 0.7244 || training accuracy 66.67% || lr 0.00032068000000000005


 12%|█▏        | 330/2835 [09:33<1:10:26,  1.69s/it]

Epoch[0/10](330/2835) || training loss 0.7496 || training accuracy 61.67% || lr 0.00033067000000000003


 12%|█▏        | 340/2835 [09:50<1:11:07,  1.71s/it]

Epoch[0/10](340/2835) || training loss 0.7284 || training accuracy 65.83% || lr 0.00034066000000000006


 12%|█▏        | 350/2835 [10:07<1:10:45,  1.71s/it]

Epoch[0/10](350/2835) || training loss 0.7437 || training accuracy 65.00% || lr 0.00035065000000000004


 13%|█▎        | 360/2835 [10:24<1:09:47,  1.69s/it]

Epoch[0/10](360/2835) || training loss 0.73 || training accuracy 68.33% || lr 0.00036064


 13%|█▎        | 370/2835 [10:41<1:09:55,  1.70s/it]

Epoch[0/10](370/2835) || training loss 0.7772 || training accuracy 57.50% || lr 0.00037063000000000005


 13%|█▎        | 380/2835 [10:58<1:10:43,  1.73s/it]

Epoch[0/10](380/2835) || training loss 0.7495 || training accuracy 67.50% || lr 0.00038062


 14%|█▍        | 390/2835 [11:16<1:10:53,  1.74s/it]

Epoch[0/10](390/2835) || training loss 0.7452 || training accuracy 72.50% || lr 0.0003906100000000001


 14%|█▍        | 400/2835 [11:33<1:10:48,  1.74s/it]

Epoch[0/10](400/2835) || training loss 0.784 || training accuracy 56.67% || lr 0.0004006000000000001


 14%|█▍        | 410/2835 [11:51<1:09:11,  1.71s/it]

Epoch[0/10](410/2835) || training loss 0.7545 || training accuracy 64.17% || lr 0.00041059000000000007


 15%|█▍        | 420/2835 [12:08<1:11:16,  1.77s/it]

Epoch[0/10](420/2835) || training loss 0.7766 || training accuracy 55.83% || lr 0.0004205800000000001


 15%|█▌        | 430/2835 [12:25<1:10:10,  1.75s/it]

Epoch[0/10](430/2835) || training loss 0.7475 || training accuracy 67.50% || lr 0.0004305700000000001


 16%|█▌        | 440/2835 [12:43<1:09:15,  1.74s/it]

Epoch[0/10](440/2835) || training loss 0.77 || training accuracy 64.17% || lr 0.0004405600000000001


 16%|█▌        | 450/2835 [13:00<1:08:13,  1.72s/it]

Epoch[0/10](450/2835) || training loss 0.7473 || training accuracy 57.50% || lr 0.0004505500000000001


 16%|█▌        | 460/2835 [13:18<1:09:19,  1.75s/it]

Epoch[0/10](460/2835) || training loss 0.7161 || training accuracy 70.83% || lr 0.00046054000000000006


 17%|█▋        | 470/2835 [13:35<1:08:49,  1.75s/it]

Epoch[0/10](470/2835) || training loss 0.7181 || training accuracy 66.67% || lr 0.0004705300000000001


 17%|█▋        | 480/2835 [13:52<1:08:14,  1.74s/it]

Epoch[0/10](480/2835) || training loss 0.736 || training accuracy 65.83% || lr 0.00048052000000000007


 17%|█▋        | 490/2835 [14:10<1:07:35,  1.73s/it]

Epoch[0/10](490/2835) || training loss 0.755 || training accuracy 70.00% || lr 0.0004905100000000001


 18%|█▊        | 500/2835 [14:27<1:07:50,  1.74s/it]

Epoch[0/10](500/2835) || training loss 0.732 || training accuracy 66.67% || lr 0.0005005000000000001


 18%|█▊        | 510/2835 [14:45<1:06:34,  1.72s/it]

Epoch[0/10](510/2835) || training loss 0.7231 || training accuracy 70.00% || lr 0.00051049


 18%|█▊        | 520/2835 [15:02<1:06:31,  1.72s/it]

Epoch[0/10](520/2835) || training loss 0.714 || training accuracy 75.00% || lr 0.00052048


 19%|█▊        | 530/2835 [15:19<1:06:51,  1.74s/it]

Epoch[0/10](530/2835) || training loss 0.7376 || training accuracy 64.17% || lr 0.0005304700000000001


 19%|█▉        | 540/2835 [15:37<1:06:45,  1.75s/it]

Epoch[0/10](540/2835) || training loss 0.7325 || training accuracy 65.00% || lr 0.0005404600000000001


 19%|█▉        | 550/2835 [15:54<1:06:32,  1.75s/it]

Epoch[0/10](550/2835) || training loss 0.7152 || training accuracy 65.00% || lr 0.0005504500000000002


 20%|█▉        | 560/2835 [16:12<1:06:14,  1.75s/it]

Epoch[0/10](560/2835) || training loss 0.7532 || training accuracy 60.83% || lr 0.00056044


 20%|██        | 570/2835 [16:29<1:04:56,  1.72s/it]

Epoch[0/10](570/2835) || training loss 0.7639 || training accuracy 59.17% || lr 0.0005704300000000001


 20%|██        | 580/2835 [16:46<1:04:59,  1.73s/it]

Epoch[0/10](580/2835) || training loss 0.7808 || training accuracy 66.67% || lr 0.0005804200000000001


 21%|██        | 590/2835 [17:04<1:03:49,  1.71s/it]

Epoch[0/10](590/2835) || training loss 0.753 || training accuracy 56.67% || lr 0.0005904100000000001


 21%|██        | 600/2835 [17:21<1:04:41,  1.74s/it]

Epoch[0/10](600/2835) || training loss 0.8106 || training accuracy 50.83% || lr 0.0006004000000000001


 22%|██▏       | 610/2835 [17:38<1:04:22,  1.74s/it]

Epoch[0/10](610/2835) || training loss 0.8106 || training accuracy 46.67% || lr 0.0006103900000000002


 22%|██▏       | 620/2835 [17:56<1:04:29,  1.75s/it]

Epoch[0/10](620/2835) || training loss 0.7814 || training accuracy 57.50% || lr 0.00062038


 22%|██▏       | 630/2835 [18:13<1:04:19,  1.75s/it]

Epoch[0/10](630/2835) || training loss 0.7777 || training accuracy 55.83% || lr 0.0006303700000000001


 23%|██▎       | 640/2835 [18:31<1:04:02,  1.75s/it]

Epoch[0/10](640/2835) || training loss 0.7964 || training accuracy 54.17% || lr 0.0006403600000000001


 23%|██▎       | 650/2835 [18:48<1:02:32,  1.72s/it]

Epoch[0/10](650/2835) || training loss 0.7683 || training accuracy 63.33% || lr 0.0006503500000000002


 23%|██▎       | 660/2835 [19:06<1:03:29,  1.75s/it]

Epoch[0/10](660/2835) || training loss 0.762 || training accuracy 63.33% || lr 0.00066034


 24%|██▎       | 670/2835 [19:23<1:02:52,  1.74s/it]

Epoch[0/10](670/2835) || training loss 0.7664 || training accuracy 61.67% || lr 0.0006703300000000001


 24%|██▍       | 680/2835 [19:41<1:02:41,  1.75s/it]

Epoch[0/10](680/2835) || training loss 0.7758 || training accuracy 51.67% || lr 0.0006803200000000001


 24%|██▍       | 690/2835 [19:58<1:01:33,  1.72s/it]

Epoch[0/10](690/2835) || training loss 0.7412 || training accuracy 64.17% || lr 0.0006903100000000001


 25%|██▍       | 700/2835 [20:15<1:02:04,  1.74s/it]

Epoch[0/10](700/2835) || training loss 0.7742 || training accuracy 61.67% || lr 0.0007003000000000001


 25%|██▌       | 710/2835 [20:33<1:01:57,  1.75s/it]

Epoch[0/10](710/2835) || training loss 0.7488 || training accuracy 61.67% || lr 0.0007102900000000001


 25%|██▌       | 720/2835 [20:50<1:01:29,  1.74s/it]

Epoch[0/10](720/2835) || training loss 0.7718 || training accuracy 55.00% || lr 0.00072028


 26%|██▌       | 730/2835 [21:08<1:01:08,  1.74s/it]

Epoch[0/10](730/2835) || training loss 0.7658 || training accuracy 52.50% || lr 0.0007302700000000001


 26%|██▌       | 740/2835 [21:25<1:00:54,  1.74s/it]

Epoch[0/10](740/2835) || training loss 0.8332 || training accuracy 45.83% || lr 0.0007402600000000001


 26%|██▋       | 750/2835 [21:43<1:00:09,  1.73s/it]

Epoch[0/10](750/2835) || training loss 0.7609 || training accuracy 58.33% || lr 0.0007502500000000002


 27%|██▋       | 760/2835 [22:00<1:00:15,  1.74s/it]

Epoch[0/10](760/2835) || training loss 0.7853 || training accuracy 53.33% || lr 0.00076024


 27%|██▋       | 770/2835 [22:17<1:00:15,  1.75s/it]

Epoch[0/10](770/2835) || training loss 0.8022 || training accuracy 49.17% || lr 0.0007702300000000001


 28%|██▊       | 780/2835 [22:35<59:51,  1.75s/it]  

Epoch[0/10](780/2835) || training loss 0.7491 || training accuracy 56.67% || lr 0.0007802200000000002


 28%|██▊       | 790/2835 [22:52<59:28,  1.74s/it]

Epoch[0/10](790/2835) || training loss 0.7668 || training accuracy 55.83% || lr 0.0007902100000000001


 28%|██▊       | 800/2835 [23:10<59:19,  1.75s/it]

Epoch[0/10](800/2835) || training loss 0.8025 || training accuracy 50.83% || lr 0.0008002000000000002


 29%|██▊       | 810/2835 [23:27<58:50,  1.74s/it]

Epoch[0/10](810/2835) || training loss 0.7819 || training accuracy 55.00% || lr 0.0008101900000000001


 29%|██▉       | 820/2835 [23:45<57:09,  1.70s/it]  

Epoch[0/10](820/2835) || training loss 0.7238 || training accuracy 60.83% || lr 0.0008201800000000001


 29%|██▉       | 830/2835 [24:02<58:21,  1.75s/it]

Epoch[0/10](830/2835) || training loss 0.7653 || training accuracy 60.83% || lr 0.0008301700000000001


 30%|██▉       | 840/2835 [24:20<57:54,  1.74s/it]

Epoch[0/10](840/2835) || training loss 0.7789 || training accuracy 56.67% || lr 0.0008401600000000002


 30%|██▉       | 850/2835 [24:37<59:07,  1.79s/it]

Epoch[0/10](850/2835) || training loss 0.7553 || training accuracy 56.67% || lr 0.0008501500000000001


 30%|███       | 860/2835 [24:54<55:54,  1.70s/it]

Epoch[0/10](860/2835) || training loss 0.79 || training accuracy 57.50% || lr 0.0008601400000000001


 31%|███       | 870/2835 [25:11<56:28,  1.72s/it]

Epoch[0/10](870/2835) || training loss 0.7622 || training accuracy 55.00% || lr 0.0008701300000000001


 31%|███       | 880/2835 [25:29<56:19,  1.73s/it]

Epoch[0/10](880/2835) || training loss 0.7911 || training accuracy 60.83% || lr 0.0008801200000000002


 31%|███▏      | 890/2835 [25:46<56:30,  1.74s/it]

Epoch[0/10](890/2835) || training loss 0.8275 || training accuracy 50.00% || lr 0.0008901100000000001


 32%|███▏      | 900/2835 [26:04<56:08,  1.74s/it]

Epoch[0/10](900/2835) || training loss 0.7838 || training accuracy 56.67% || lr 0.0009001000000000001


 32%|███▏      | 910/2835 [26:21<57:29,  1.79s/it]

Epoch[0/10](910/2835) || training loss 0.7913 || training accuracy 47.50% || lr 0.0009100900000000001


 32%|███▏      | 920/2835 [26:38<55:19,  1.73s/it]

Epoch[0/10](920/2835) || training loss 0.7547 || training accuracy 58.33% || lr 0.0009200800000000001


 33%|███▎      | 930/2835 [26:56<55:23,  1.74s/it]

Epoch[0/10](930/2835) || training loss 0.7988 || training accuracy 55.83% || lr 0.0009300700000000001


 33%|███▎      | 940/2835 [27:13<53:22,  1.69s/it]

Epoch[0/10](940/2835) || training loss 0.7825 || training accuracy 58.33% || lr 0.0009400600000000002


 34%|███▎      | 950/2835 [27:30<54:28,  1.73s/it]

Epoch[0/10](950/2835) || training loss 0.766 || training accuracy 62.50% || lr 0.0009500500000000001


 34%|███▍      | 960/2835 [27:48<54:16,  1.74s/it]

Epoch[0/10](960/2835) || training loss 0.8188 || training accuracy 47.50% || lr 0.0009600400000000001


 34%|███▍      | 970/2835 [28:05<54:20,  1.75s/it]

Epoch[0/10](970/2835) || training loss 0.7889 || training accuracy 56.67% || lr 0.0009700300000000001


 35%|███▍      | 980/2835 [28:23<53:33,  1.73s/it]

Epoch[0/10](980/2835) || training loss 0.8169 || training accuracy 52.50% || lr 0.00098002


 35%|███▍      | 990/2835 [28:40<53:06,  1.73s/it]

Epoch[0/10](990/2835) || training loss 0.8154 || training accuracy 50.83% || lr 0.00099001


 35%|███▌      | 1000/2835 [28:57<53:03,  1.73s/it]

Epoch[0/10](1000/2835) || training loss 0.8229 || training accuracy 44.17% || lr 0.001


 36%|███▌      | 1010/2835 [29:15<53:16,  1.75s/it]

Epoch[0/10](1010/2835) || training loss 0.831 || training accuracy 39.17% || lr 0.0009999383779245895


 36%|███▌      | 1020/2835 [29:32<52:56,  1.75s/it]

Epoch[0/10](1020/2835) || training loss 0.8087 || training accuracy 48.33% || lr 0.0009997535269026829


 36%|███▋      | 1030/2835 [29:50<52:34,  1.75s/it]

Epoch[0/10](1030/2835) || training loss 0.7811 || training accuracy 48.33% || lr 0.000999445492543504


 37%|███▋      | 1040/2835 [30:07<51:54,  1.74s/it]

Epoch[0/10](1040/2835) || training loss 0.8186 || training accuracy 45.83% || lr 0.0009990143508499217


 37%|███▋      | 1050/2835 [30:24<51:32,  1.73s/it]

Epoch[0/10](1050/2835) || training loss 0.7833 || training accuracy 54.17% || lr 0.0009984602081996976


 37%|███▋      | 1060/2835 [30:42<51:15,  1.73s/it]

Epoch[0/10](1060/2835) || training loss 0.7575 || training accuracy 57.50% || lr 0.0009977832013192385


 38%|███▊      | 1070/2835 [30:59<51:07,  1.74s/it]

Epoch[0/10](1070/2835) || training loss 0.7923 || training accuracy 55.83% || lr 0.0009969834972498623


 38%|███▊      | 1080/2835 [31:16<49:59,  1.71s/it]

Epoch[0/10](1080/2835) || training loss 0.7996 || training accuracy 50.83% || lr 0.0009960612933065818


 38%|███▊      | 1090/2835 [31:34<51:20,  1.77s/it]

Epoch[0/10](1090/2835) || training loss 0.7753 || training accuracy 64.17% || lr 0.0009950168170294205


 39%|███▉      | 1100/2835 [31:51<49:12,  1.70s/it]

Epoch[0/10](1100/2835) || training loss 0.7722 || training accuracy 55.83% || lr 0.0009938503261272714


 39%|███▉      | 1110/2835 [32:08<49:31,  1.72s/it]

Epoch[0/10](1110/2835) || training loss 0.7745 || training accuracy 57.50% || lr 0.0009925621084143096


 40%|███▉      | 1120/2835 [32:25<49:27,  1.73s/it]

Epoch[0/10](1120/2835) || training loss 0.7839 || training accuracy 58.33% || lr 0.00099115248173898


 40%|███▉      | 1130/2835 [32:42<48:52,  1.72s/it]

Epoch[0/10](1130/2835) || training loss 0.7594 || training accuracy 59.17% || lr 0.000989621793905572


 40%|████      | 1140/2835 [33:00<49:13,  1.74s/it]

Epoch[0/10](1140/2835) || training loss 0.7824 || training accuracy 54.17% || lr 0.0009879704225884043


 41%|████      | 1150/2835 [33:17<48:23,  1.72s/it]

Epoch[0/10](1150/2835) || training loss 0.759 || training accuracy 60.83% || lr 0.0009861987752386395


 41%|████      | 1160/2835 [33:34<47:37,  1.71s/it]

Epoch[0/10](1160/2835) || training loss 0.7436 || training accuracy 62.50% || lr 0.0009843072889837512


 41%|████▏     | 1170/2835 [33:51<48:00,  1.73s/it]

Epoch[0/10](1170/2835) || training loss 0.756 || training accuracy 57.50% || lr 0.0009822964305196702


 42%|████▏     | 1180/2835 [34:09<53:27,  1.94s/it]

Epoch[0/10](1180/2835) || training loss 0.7755 || training accuracy 55.00% || lr 0.000980166695995633


 42%|████▏     | 1190/2835 [34:31<58:13,  2.12s/it]

Epoch[0/10](1190/2835) || training loss 0.7841 || training accuracy 60.83% || lr 0.000977918610891766


 42%|████▏     | 1200/2835 [34:52<57:53,  2.12s/it]

Epoch[0/10](1200/2835) || training loss 0.7493 || training accuracy 61.67% || lr 0.0009755527298894294


 43%|████▎     | 1210/2835 [35:13<56:47,  2.10s/it]

Epoch[0/10](1210/2835) || training loss 0.7987 || training accuracy 52.50% || lr 0.000973069636734359


 43%|████▎     | 1220/2835 [35:34<56:30,  2.10s/it]

Epoch[0/10](1220/2835) || training loss 0.8056 || training accuracy 53.33% || lr 0.0009704699440926358


 43%|████▎     | 1230/2835 [35:55<56:18,  2.10s/it]

Epoch[0/10](1230/2835) || training loss 0.7147 || training accuracy 69.17% || lr 0.0009677542933995189


 44%|████▎     | 1240/2835 [36:12<45:22,  1.71s/it]

Epoch[0/10](1240/2835) || training loss 0.7754 || training accuracy 60.83% || lr 0.0009649233547011816


 44%|████▍     | 1250/2835 [36:29<45:48,  1.73s/it]

Epoch[0/10](1250/2835) || training loss 0.777 || training accuracy 65.83% || lr 0.0009619778264893878


 44%|████▍     | 1260/2835 [36:47<45:35,  1.74s/it]

Epoch[0/10](1260/2835) || training loss 0.8109 || training accuracy 51.67% || lr 0.0009589184355291487


 45%|████▍     | 1270/2835 [37:04<45:21,  1.74s/it]

Epoch[0/10](1270/2835) || training loss 0.7878 || training accuracy 63.33% || lr 0.000955745936679405


 45%|████▌     | 1280/2835 [37:21<44:58,  1.74s/it]

Epoch[0/10](1280/2835) || training loss 0.8114 || training accuracy 49.17% || lr 0.0009524611127067769


 46%|████▌     | 1290/2835 [37:39<44:41,  1.74s/it]

Epoch[0/10](1290/2835) || training loss 0.809 || training accuracy 57.50% || lr 0.0009490647740924275


 46%|████▌     | 1300/2835 [37:56<44:29,  1.74s/it]

Epoch[0/10](1300/2835) || training loss 0.8123 || training accuracy 55.00% || lr 0.0009455577588320898


 46%|████▌     | 1310/2835 [38:13<43:44,  1.72s/it]

Epoch[0/10](1310/2835) || training loss 0.7829 || training accuracy 58.33% || lr 0.0009419409322293026


 47%|████▋     | 1320/2835 [38:30<43:40,  1.73s/it]

Epoch[0/10](1320/2835) || training loss 0.7687 || training accuracy 59.17% || lr 0.0009382151866819099


 47%|████▋     | 1330/2835 [38:48<43:34,  1.74s/it]

Epoch[0/10](1330/2835) || training loss 0.7328 || training accuracy 60.00% || lr 0.0009343814414618767


 47%|████▋     | 1340/2835 [39:05<43:33,  1.75s/it]

Epoch[0/10](1340/2835) || training loss 0.7611 || training accuracy 54.17% || lr 0.00093044064248847


 48%|████▊     | 1350/2835 [39:23<43:04,  1.74s/it]

Epoch[0/10](1350/2835) || training loss 0.7495 || training accuracy 56.67% || lr 0.0009263937620948692


 48%|████▊     | 1360/2835 [39:40<42:16,  1.72s/it]

Epoch[0/10](1360/2835) || training loss 0.7851 || training accuracy 56.67% || lr 0.0009222417987882566


 48%|████▊     | 1370/2835 [39:57<42:10,  1.73s/it]

Epoch[0/10](1370/2835) || training loss 0.7769 || training accuracy 54.17% || lr 0.0009179857770034512


 49%|████▊     | 1380/2835 [40:15<41:55,  1.73s/it]

Epoch[0/10](1380/2835) || training loss 0.7793 || training accuracy 59.17% || lr 0.0009136267468501438


 49%|████▉     | 1390/2835 [40:32<41:09,  1.71s/it]

Epoch[0/10](1390/2835) || training loss 0.787 || training accuracy 50.83% || lr 0.0009091657838537993


 49%|████▉     | 1400/2835 [40:49<41:24,  1.73s/it]

Epoch[0/10](1400/2835) || training loss 0.772 || training accuracy 54.17% || lr 0.0009046039886902864


 50%|████▉     | 1410/2835 [41:07<41:29,  1.75s/it]

Epoch[0/10](1410/2835) || training loss 0.8182 || training accuracy 50.00% || lr 0.0008999424869143019


 50%|█████     | 1420/2835 [41:24<41:18,  1.75s/it]

Epoch[0/10](1420/2835) || training loss 0.7494 || training accuracy 63.33% || lr 0.0008951824286816573


 50%|█████     | 1430/2835 [41:42<40:50,  1.74s/it]

Epoch[0/10](1430/2835) || training loss 0.7587 || training accuracy 60.83% || lr 0.0008903249884654958


 51%|█████     | 1440/2835 [41:59<40:27,  1.74s/it]

Epoch[0/10](1440/2835) || training loss 0.8044 || training accuracy 55.00% || lr 0.0008853713647665067


 51%|█████     | 1450/2835 [42:16<39:45,  1.72s/it]

Epoch[0/10](1450/2835) || training loss 0.7932 || training accuracy 58.33% || lr 0.0008803227798172156


 51%|█████▏    | 1460/2835 [42:34<39:58,  1.74s/it]

Epoch[0/10](1460/2835) || training loss 0.757 || training accuracy 62.50% || lr 0.0008751804792804147


 52%|█████▏    | 1470/2835 [42:51<39:47,  1.75s/it]

Epoch[0/10](1470/2835) || training loss 0.7485 || training accuracy 65.00% || lr 0.0008699457319418156


 52%|█████▏    | 1480/2835 [43:09<39:19,  1.74s/it]

Epoch[0/10](1480/2835) || training loss 0.7744 || training accuracy 57.50% || lr 0.0008646198293969952


 53%|█████▎    | 1490/2835 [43:26<39:11,  1.75s/it]

Epoch[0/10](1490/2835) || training loss 0.7187 || training accuracy 64.17% || lr 0.0008592040857327129


 53%|█████▎    | 1500/2835 [43:44<38:48,  1.74s/it]

Epoch[0/10](1500/2835) || training loss 0.7483 || training accuracy 63.33% || lr 0.0008536998372026805


 53%|█████▎    | 1510/2835 [44:01<38:41,  1.75s/it]

Epoch[0/10](1510/2835) || training loss 0.7284 || training accuracy 68.33% || lr 0.0008481084418978611


 54%|█████▎    | 1520/2835 [44:19<37:50,  1.73s/it]

Epoch[0/10](1520/2835) || training loss 0.7547 || training accuracy 65.00% || lr 0.0008424312794113801


 54%|█████▍    | 1530/2835 [44:36<37:56,  1.74s/it]

Epoch[0/10](1530/2835) || training loss 0.7108 || training accuracy 68.33% || lr 0.0008366697504981319


 54%|█████▍    | 1540/2835 [44:53<37:44,  1.75s/it]

Epoch[0/10](1540/2835) || training loss 0.751 || training accuracy 65.00% || lr 0.0008308252767291642


 55%|█████▍    | 1550/2835 [45:11<37:19,  1.74s/it]

Epoch[0/10](1550/2835) || training loss 0.7839 || training accuracy 57.50% || lr 0.0008248993001409269


 55%|█████▌    | 1560/2835 [45:28<36:28,  1.72s/it]

Epoch[0/10](1560/2835) || training loss 0.7657 || training accuracy 59.17% || lr 0.0008188932828794706


 55%|█████▌    | 1570/2835 [45:45<36:15,  1.72s/it]

Epoch[0/10](1570/2835) || training loss 0.7484 || training accuracy 67.50% || lr 0.0008128087068396848


 56%|█████▌    | 1580/2835 [46:03<36:28,  1.74s/it]

Epoch[0/10](1580/2835) || training loss 0.7765 || training accuracy 65.00% || lr 0.0008066470732996619


 56%|█████▌    | 1590/2835 [46:20<35:50,  1.73s/it]

Epoch[0/10](1590/2835) || training loss 0.8119 || training accuracy 60.83% || lr 0.0008004099025502792


 56%|█████▋    | 1600/2835 [46:37<35:40,  1.73s/it]

Epoch[0/10](1600/2835) || training loss 0.7588 || training accuracy 56.67% || lr 0.0007940987335200904


 57%|█████▋    | 1610/2835 [46:55<35:37,  1.75s/it]

Epoch[0/10](1610/2835) || training loss 0.7318 || training accuracy 69.17% || lr 0.0007877151233956178


 57%|█████▋    | 1620/2835 [47:12<35:05,  1.73s/it]

Epoch[0/10](1620/2835) || training loss 0.7486 || training accuracy 60.83% || lr 0.0007812606472371393


 57%|█████▋    | 1630/2835 [47:29<34:47,  1.73s/it]

Epoch[0/10](1630/2835) || training loss 0.7608 || training accuracy 61.67% || lr 0.000774736897590067


 58%|█████▊    | 1640/2835 [47:47<34:50,  1.75s/it]

Epoch[0/10](1640/2835) || training loss 0.7269 || training accuracy 67.50% || lr 0.0007681454840920089


 58%|█████▊    | 1650/2835 [48:04<34:02,  1.72s/it]

Epoch[0/10](1650/2835) || training loss 0.7067 || training accuracy 67.50% || lr 0.0007614880330756165


 59%|█████▊    | 1660/2835 [48:21<34:02,  1.74s/it]

Epoch[0/10](1660/2835) || training loss 0.7492 || training accuracy 59.17% || lr 0.0007547661871673105


 59%|█████▉    | 1670/2835 [48:39<33:34,  1.73s/it]

Epoch[0/10](1670/2835) || training loss 0.7196 || training accuracy 67.50% || lr 0.0007479816048819877


 59%|█████▉    | 1680/2835 [48:56<33:25,  1.74s/it]

Epoch[0/10](1680/2835) || training loss 0.712 || training accuracy 65.00% || lr 0.0007411359602138069


 60%|█████▉    | 1690/2835 [49:14<33:16,  1.74s/it]

Epoch[0/10](1690/2835) || training loss 0.7711 || training accuracy 70.83% || lr 0.0007342309422231566


 60%|█████▉    | 1700/2835 [49:31<33:01,  1.75s/it]

Epoch[0/10](1700/2835) || training loss 0.7626 || training accuracy 66.67% || lr 0.0007272682546199037


 60%|██████    | 1710/2835 [49:48<32:28,  1.73s/it]

Epoch[0/10](1710/2835) || training loss 0.6985 || training accuracy 69.17% || lr 0.0007202496153430297


 61%|██████    | 1720/2835 [50:06<32:18,  1.74s/it]

Epoch[0/10](1720/2835) || training loss 0.7591 || training accuracy 61.67% || lr 0.0007131767561367538


 61%|██████    | 1730/2835 [50:23<32:02,  1.74s/it]

Epoch[0/10](1730/2835) || training loss 0.6925 || training accuracy 72.50% || lr 0.0007060514221232519


 61%|██████▏   | 1740/2835 [50:41<31:52,  1.75s/it]

Epoch[0/10](1740/2835) || training loss 0.7455 || training accuracy 67.50% || lr 0.000698875371372073


 62%|██████▏   | 1750/2835 [50:58<31:27,  1.74s/it]

Epoch[0/10](1750/2835) || training loss 0.7579 || training accuracy 69.17% || lr 0.0006916503744663625


 62%|██████▏   | 1760/2835 [51:16<31:25,  1.75s/it]

Epoch[0/10](1760/2835) || training loss 0.7114 || training accuracy 70.83% || lr 0.0006843782140659967


 62%|██████▏   | 1770/2835 [51:33<31:02,  1.75s/it]

Epoch[0/10](1770/2835) || training loss 0.7317 || training accuracy 69.17% || lr 0.000677060684467739


 63%|██████▎   | 1780/2835 [51:50<30:10,  1.72s/it]

Epoch[0/10](1780/2835) || training loss 0.7395 || training accuracy 65.83% || lr 0.0006696995911625233


 63%|██████▎   | 1790/2835 [52:07<29:48,  1.71s/it]

Epoch[0/10](1790/2835) || training loss 0.731 || training accuracy 68.33% || lr 0.0006622967503899758


 63%|██████▎   | 1800/2835 [52:25<29:41,  1.72s/it]

Epoch[0/10](1800/2835) || training loss 0.7334 || training accuracy 69.17% || lr 0.0006548539886902864


 64%|██████▍   | 1810/2835 [52:42<29:30,  1.73s/it]

Epoch[0/10](1810/2835) || training loss 0.7392 || training accuracy 64.17% || lr 0.000647373142453536


 64%|██████▍   | 1820/2835 [52:59<29:30,  1.74s/it]

Epoch[0/10](1820/2835) || training loss 0.7392 || training accuracy 70.83% || lr 0.0006398560574665951


 65%|██████▍   | 1830/2835 [53:17<29:10,  1.74s/it]

Epoch[0/10](1830/2835) || training loss 0.759 || training accuracy 65.00% || lr 0.0006323045884577039


 65%|██████▍   | 1840/2835 [53:34<28:56,  1.74s/it]

Epoch[0/10](1840/2835) || training loss 0.7185 || training accuracy 70.00% || lr 0.0006247205986388449


 65%|██████▌   | 1850/2835 [53:52<28:26,  1.73s/it]

Epoch[0/10](1850/2835) || training loss 0.7203 || training accuracy 72.50% || lr 0.0006171059592460249


 66%|██████▌   | 1860/2835 [54:09<28:08,  1.73s/it]

Epoch[0/10](1860/2835) || training loss 0.7338 || training accuracy 71.67% || lr 0.0006094625490775732


 66%|██████▌   | 1870/2835 [54:26<27:57,  1.74s/it]

Epoch[0/10](1870/2835) || training loss 0.7017 || training accuracy 73.33% || lr 0.000601792254030578


 66%|██████▋   | 1880/2835 [54:44<27:41,  1.74s/it]

Epoch[0/10](1880/2835) || training loss 0.7038 || training accuracy 73.33% || lr 0.0005940969666355696


 67%|██████▋   | 1890/2835 [55:01<27:25,  1.74s/it]

Epoch[0/10](1890/2835) || training loss 0.692 || training accuracy 73.33% || lr 0.0005863785855895653


 67%|██████▋   | 1900/2835 [55:19<27:10,  1.74s/it]

Epoch[0/10](1900/2835) || training loss 0.7137 || training accuracy 71.67% || lr 0.0005786390152875954


 67%|██████▋   | 1910/2835 [55:36<26:51,  1.74s/it]

Epoch[0/10](1910/2835) || training loss 0.715 || training accuracy 70.00% || lr 0.0005708801653528226


 68%|██████▊   | 1920/2835 [55:53<26:40,  1.75s/it]

Epoch[0/10](1920/2835) || training loss 0.7007 || training accuracy 77.50% || lr 0.00056310395016537


 68%|██████▊   | 1930/2835 [56:11<25:55,  1.72s/it]

Epoch[0/10](1930/2835) || training loss 0.7304 || training accuracy 70.00% || lr 0.0005553122883899772


 68%|██████▊   | 1940/2835 [56:28<25:56,  1.74s/it]

Epoch[0/10](1940/2835) || training loss 0.6873 || training accuracy 74.17% || lr 0.000547507102502598


 69%|██████▉   | 1950/2835 [56:46<25:32,  1.73s/it]

Epoch[0/10](1950/2835) || training loss 0.71 || training accuracy 76.67% || lr 0.0005396903183160586


 69%|██████▉   | 1960/2835 [57:03<25:29,  1.75s/it]

Epoch[0/10](1960/2835) || training loss 0.7522 || training accuracy 68.33% || lr 0.0005318638645048922


 69%|██████▉   | 1970/2835 [57:21<25:09,  1.74s/it]

Epoch[0/10](1970/2835) || training loss 0.7102 || training accuracy 70.00% || lr 0.0005240296721294665


 70%|██████▉   | 1980/2835 [57:38<24:42,  1.73s/it]

Epoch[0/10](1980/2835) || training loss 0.6763 || training accuracy 78.33% || lr 0.0005161896741595252


 70%|███████   | 1990/2835 [57:55<24:21,  1.73s/it]

Epoch[0/10](1990/2835) || training loss 0.7269 || training accuracy 70.83% || lr 0.0005083458049972545


 71%|███████   | 2000/2835 [58:12<23:50,  1.71s/it]

Epoch[0/10](2000/2835) || training loss 0.7004 || training accuracy 71.67% || lr 0.0005005000000000001


 71%|███████   | 2010/2835 [58:30<23:53,  1.74s/it]

Epoch[0/10](2010/2835) || training loss 0.7529 || training accuracy 60.00% || lr 0.0004926541950027458


 71%|███████▏  | 2020/2835 [58:47<23:39,  1.74s/it]

Epoch[0/10](2020/2835) || training loss 0.7286 || training accuracy 70.00% || lr 0.000484810325840475


 72%|███████▏  | 2030/2835 [59:05<23:27,  1.75s/it]

Epoch[0/10](2030/2835) || training loss 0.7179 || training accuracy 61.67% || lr 0.00047697032787053347


 72%|███████▏  | 2040/2835 [59:22<23:06,  1.74s/it]

Epoch[0/10](2040/2835) || training loss 0.737 || training accuracy 60.00% || lr 0.0004691361354951082


 72%|███████▏  | 2050/2835 [59:39<22:34,  1.72s/it]

Epoch[0/10](2050/2835) || training loss 0.7218 || training accuracy 65.00% || lr 0.0004613096816839416


 73%|███████▎  | 2060/2835 [59:57<22:30,  1.74s/it]

Epoch[0/10](2060/2835) || training loss 0.7344 || training accuracy 65.83% || lr 0.0004534928974974022


 73%|███████▎  | 2070/2835 [1:00:14<22:10,  1.74s/it]

Epoch[0/10](2070/2835) || training loss 0.7081 || training accuracy 74.17% || lr 0.000445687711610023


 73%|███████▎  | 2080/2835 [1:00:31<21:45,  1.73s/it]

Epoch[0/10](2080/2835) || training loss 0.7427 || training accuracy 69.17% || lr 0.00043789604983463014


 74%|███████▎  | 2090/2835 [1:00:49<21:40,  1.75s/it]

Epoch[0/10](2090/2835) || training loss 0.7011 || training accuracy 75.00% || lr 0.00043011983464717756


 74%|███████▍  | 2100/2835 [1:01:06<21:20,  1.74s/it]

Epoch[0/10](2100/2835) || training loss 0.7309 || training accuracy 70.83% || lr 0.00042236098471240476


 74%|███████▍  | 2110/2835 [1:01:24<21:02,  1.74s/it]

Epoch[0/10](2110/2835) || training loss 0.6791 || training accuracy 75.83% || lr 0.00041462141441043495


 75%|███████▍  | 2120/2835 [1:01:41<20:48,  1.75s/it]

Epoch[0/10](2120/2835) || training loss 0.7188 || training accuracy 69.17% || lr 0.00040690303336443065


 75%|███████▌  | 2130/2835 [1:01:59<20:30,  1.74s/it]

Epoch[0/10](2130/2835) || training loss 0.7295 || training accuracy 66.67% || lr 0.00039920774596942214


 75%|███████▌  | 2140/2835 [1:02:16<19:51,  1.71s/it]

Epoch[0/10](2140/2835) || training loss 0.655 || training accuracy 77.50% || lr 0.00039153745092242707


 76%|███████▌  | 2150/2835 [1:02:33<19:24,  1.70s/it]

Epoch[0/10](2150/2835) || training loss 0.6977 || training accuracy 75.83% || lr 0.00038389404075397536


 76%|███████▌  | 2160/2835 [1:02:50<19:16,  1.71s/it]

Epoch[0/10](2160/2835) || training loss 0.7051 || training accuracy 70.00% || lr 0.0003762794013611552


 77%|███████▋  | 2170/2835 [1:03:07<19:06,  1.72s/it]

Epoch[0/10](2170/2835) || training loss 0.6938 || training accuracy 76.67% || lr 0.00036869541154229633


 77%|███████▋  | 2180/2835 [1:03:25<18:57,  1.74s/it]

Epoch[0/10](2180/2835) || training loss 0.6927 || training accuracy 75.83% || lr 0.00036114394253340497


 77%|███████▋  | 2190/2835 [1:03:42<18:33,  1.73s/it]

Epoch[0/10](2190/2835) || training loss 0.6784 || training accuracy 76.67% || lr 0.0003536268575464643


 78%|███████▊  | 2200/2835 [1:03:59<17:56,  1.69s/it]

Epoch[0/10](2200/2835) || training loss 0.7411 || training accuracy 71.67% || lr 0.0003461460113097139


 78%|███████▊  | 2210/2835 [1:04:17<18:02,  1.73s/it]

Epoch[0/10](2210/2835) || training loss 0.6945 || training accuracy 75.83% || lr 0.0003387032496100244


 78%|███████▊  | 2220/2835 [1:04:34<17:37,  1.72s/it]

Epoch[0/10](2220/2835) || training loss 0.6771 || training accuracy 72.50% || lr 0.00033130040883747703


 79%|███████▊  | 2230/2835 [1:04:51<17:18,  1.72s/it]

Epoch[0/10](2230/2835) || training loss 0.7119 || training accuracy 70.83% || lr 0.0003239393155322612


 79%|███████▉  | 2240/2835 [1:05:08<17:13,  1.74s/it]

Epoch[0/10](2240/2835) || training loss 0.7067 || training accuracy 78.33% || lr 0.00031662178593400343


 79%|███████▉  | 2250/2835 [1:05:26<16:51,  1.73s/it]

Epoch[0/10](2250/2835) || training loss 0.7219 || training accuracy 71.67% || lr 0.00030934962553363774


 80%|███████▉  | 2260/2835 [1:05:43<16:36,  1.73s/it]

Epoch[0/10](2260/2835) || training loss 0.7341 || training accuracy 70.00% || lr 0.0003021246286279271


 80%|████████  | 2270/2835 [1:06:00<16:25,  1.74s/it]

Epoch[0/10](2270/2835) || training loss 0.718 || training accuracy 67.50% || lr 0.00029494857787674825


 80%|████████  | 2280/2835 [1:06:18<16:08,  1.74s/it]

Epoch[0/10](2280/2835) || training loss 0.6957 || training accuracy 75.83% || lr 0.00028782324386324626


 81%|████████  | 2290/2835 [1:06:35<15:39,  1.72s/it]

Epoch[0/10](2290/2835) || training loss 0.6996 || training accuracy 78.33% || lr 0.0002807503846569704


 81%|████████  | 2300/2835 [1:06:52<15:17,  1.72s/it]

Epoch[0/10](2300/2835) || training loss 0.6738 || training accuracy 75.83% || lr 0.00027373174538009644


 81%|████████▏ | 2310/2835 [1:07:10<14:57,  1.71s/it]

Epoch[0/10](2310/2835) || training loss 0.7248 || training accuracy 72.50% || lr 0.00026676905777684373


 82%|████████▏ | 2320/2835 [1:07:27<14:58,  1.75s/it]

Epoch[0/10](2320/2835) || training loss 0.6968 || training accuracy 73.33% || lr 0.0002598640397861934


 82%|████████▏ | 2330/2835 [1:07:44<14:40,  1.74s/it]

Epoch[0/10](2330/2835) || training loss 0.7082 || training accuracy 75.00% || lr 0.00025301839511801245


 83%|████████▎ | 2340/2835 [1:08:02<14:26,  1.75s/it]

Epoch[0/10](2340/2835) || training loss 0.6862 || training accuracy 77.50% || lr 0.00024623381283268956


 83%|████████▎ | 2350/2835 [1:08:19<13:55,  1.72s/it]

Epoch[0/10](2350/2835) || training loss 0.7021 || training accuracy 73.33% || lr 0.0002395119669243836


 83%|████████▎ | 2360/2835 [1:08:36<13:35,  1.72s/it]

Epoch[0/10](2360/2835) || training loss 0.6829 || training accuracy 74.17% || lr 0.0002328545159079913


 84%|████████▎ | 2370/2835 [1:08:53<13:17,  1.71s/it]

Epoch[0/10](2370/2835) || training loss 0.6761 || training accuracy 78.33% || lr 0.0002262631024099333


 84%|████████▍ | 2380/2835 [1:09:11<13:05,  1.73s/it]

Epoch[0/10](2380/2835) || training loss 0.6967 || training accuracy 73.33% || lr 0.00021973935276286074


 84%|████████▍ | 2390/2835 [1:09:28<12:53,  1.74s/it]

Epoch[0/10](2390/2835) || training loss 0.6851 || training accuracy 79.17% || lr 0.00021328487660438237


 85%|████████▍ | 2400/2835 [1:09:45<12:34,  1.73s/it]

Epoch[0/10](2400/2835) || training loss 0.6679 || training accuracy 76.67% || lr 0.00020690126647990973


 85%|████████▌ | 2410/2835 [1:10:03<12:05,  1.71s/it]

Epoch[0/10](2410/2835) || training loss 0.6954 || training accuracy 80.83% || lr 0.000200590097449721


 85%|████████▌ | 2420/2835 [1:10:20<11:50,  1.71s/it]

Epoch[0/10](2420/2835) || training loss 0.669 || training accuracy 84.17% || lr 0.00019435292670033839


 86%|████████▌ | 2430/2835 [1:10:37<11:36,  1.72s/it]

Epoch[0/10](2430/2835) || training loss 0.6871 || training accuracy 83.33% || lr 0.00018819129316031527


 86%|████████▌ | 2440/2835 [1:10:54<11:17,  1.72s/it]

Epoch[0/10](2440/2835) || training loss 0.6963 || training accuracy 76.67% || lr 0.00018210671712052948


 86%|████████▋ | 2450/2835 [1:11:11<11:06,  1.73s/it]

Epoch[0/10](2450/2835) || training loss 0.7178 || training accuracy 76.67% || lr 0.00017610069985907333


 87%|████████▋ | 2460/2835 [1:11:29<10:45,  1.72s/it]

Epoch[0/10](2460/2835) || training loss 0.6785 || training accuracy 80.00% || lr 0.00017017472327083582


 87%|████████▋ | 2470/2835 [1:11:46<10:30,  1.73s/it]

Epoch[0/10](2470/2835) || training loss 0.6737 || training accuracy 81.67% || lr 0.00016433024950186835


 87%|████████▋ | 2480/2835 [1:12:03<10:17,  1.74s/it]

Epoch[0/10](2480/2835) || training loss 0.6721 || training accuracy 83.33% || lr 0.00015856872058862015


 88%|████████▊ | 2490/2835 [1:12:21<09:59,  1.74s/it]

Epoch[0/10](2490/2835) || training loss 0.6516 || training accuracy 81.67% || lr 0.00015289155810213902


 88%|████████▊ | 2500/2835 [1:12:38<09:44,  1.74s/it]

Epoch[0/10](2500/2835) || training loss 0.7074 || training accuracy 75.83% || lr 0.00014730016279731955


 89%|████████▊ | 2510/2835 [1:12:56<09:27,  1.75s/it]

Epoch[0/10](2510/2835) || training loss 0.6965 || training accuracy 84.17% || lr 0.00014179591426728726


 89%|████████▉ | 2520/2835 [1:13:13<09:07,  1.74s/it]

Epoch[0/10](2520/2835) || training loss 0.6738 || training accuracy 81.67% || lr 0.0001363801706030049


 89%|████████▉ | 2530/2835 [1:13:30<08:53,  1.75s/it]

Epoch[0/10](2530/2835) || training loss 0.6435 || training accuracy 84.17% || lr 0.00013105426805818459


 90%|████████▉ | 2540/2835 [1:13:48<08:33,  1.74s/it]

Epoch[0/10](2540/2835) || training loss 0.689 || training accuracy 80.00% || lr 0.00012581952071958545


 90%|████████▉ | 2550/2835 [1:14:05<08:13,  1.73s/it]

Epoch[0/10](2550/2835) || training loss 0.7006 || training accuracy 76.67% || lr 0.00012067722018278456


 90%|█████████ | 2560/2835 [1:14:22<07:42,  1.68s/it]

Epoch[0/10](2560/2835) || training loss 0.6699 || training accuracy 80.83% || lr 0.00011562863523349333


 91%|█████████ | 2570/2835 [1:14:39<07:32,  1.71s/it]

Epoch[0/10](2570/2835) || training loss 0.6874 || training accuracy 74.17% || lr 0.00011067501153450427


 91%|█████████ | 2580/2835 [1:14:56<07:22,  1.74s/it]

Epoch[0/10](2580/2835) || training loss 0.6672 || training accuracy 79.17% || lr 0.00010581757131834264


 91%|█████████▏| 2590/2835 [1:15:14<07:07,  1.75s/it]

Epoch[0/10](2590/2835) || training loss 0.7055 || training accuracy 79.17% || lr 0.00010105751308569843


 92%|█████████▏| 2600/2835 [1:15:31<06:48,  1.74s/it]

Epoch[0/10](2600/2835) || training loss 0.6976 || training accuracy 76.67% || lr 9.639601130971382e-05


 92%|█████████▏| 2610/2835 [1:15:49<06:27,  1.72s/it]

Epoch[0/10](2610/2835) || training loss 0.7008 || training accuracy 80.83% || lr 9.183421614620087e-05


 92%|█████████▏| 2620/2835 [1:16:06<06:14,  1.74s/it]

Epoch[0/10](2620/2835) || training loss 0.6897 || training accuracy 77.50% || lr 8.737325314985643e-05


 93%|█████████▎| 2630/2835 [1:16:23<05:57,  1.74s/it]

Epoch[0/10](2630/2835) || training loss 0.6752 || training accuracy 81.67% || lr 8.3014222996549e-05


 93%|█████████▎| 2640/2835 [1:16:41<05:38,  1.74s/it]

Epoch[0/10](2640/2835) || training loss 0.6341 || training accuracy 86.67% || lr 7.875820121174348e-05


 93%|█████████▎| 2650/2835 [1:16:58<05:21,  1.74s/it]

Epoch[0/10](2650/2835) || training loss 0.699 || training accuracy 75.83% || lr 7.460623790513096e-05


 94%|█████████▍| 2660/2835 [1:17:15<05:05,  1.74s/it]

Epoch[0/10](2660/2835) || training loss 0.6176 || training accuracy 85.00% || lr 7.055935751153021e-05


 94%|█████████▍| 2670/2835 [1:17:33<04:47,  1.74s/it]

Epoch[0/10](2670/2835) || training loss 0.6324 || training accuracy 83.33% || lr 6.661855853812355e-05


 95%|█████████▍| 2680/2835 [1:17:50<04:29,  1.74s/it]

Epoch[0/10](2680/2835) || training loss 0.6499 || training accuracy 80.00% || lr 6.278481331809015e-05


 95%|█████████▍| 2690/2835 [1:18:08<04:10,  1.73s/it]

Epoch[0/10](2690/2835) || training loss 0.6537 || training accuracy 86.67% || lr 5.9059067770697614e-05


 95%|█████████▌| 2700/2835 [1:18:25<03:57,  1.76s/it]

Epoch[0/10](2700/2835) || training loss 0.6286 || training accuracy 85.00% || lr 5.544224116791029e-05


 96%|█████████▌| 2710/2835 [1:18:42<03:34,  1.71s/it]

Epoch[0/10](2710/2835) || training loss 0.6902 || training accuracy 79.17% || lr 5.193522590757254e-05


 96%|█████████▌| 2720/2835 [1:18:59<03:16,  1.71s/it]

Epoch[0/10](2720/2835) || training loss 0.6862 || training accuracy 80.00% || lr 4.853888729322333e-05


 96%|█████████▋| 2730/2835 [1:19:17<03:01,  1.73s/it]

Epoch[0/10](2730/2835) || training loss 0.6486 || training accuracy 80.83% || lr 4.5254063320595075e-05


 97%|█████████▋| 2740/2835 [1:19:34<02:43,  1.72s/it]

Epoch[0/10](2740/2835) || training loss 0.6961 || training accuracy 70.00% || lr 4.208156447085142e-05


 97%|█████████▋| 2750/2835 [1:19:51<02:24,  1.70s/it]

Epoch[0/10](2750/2835) || training loss 0.6411 || training accuracy 85.00% || lr 3.902217351061228e-05


 97%|█████████▋| 2760/2835 [1:20:08<02:08,  1.72s/it]

Epoch[0/10](2760/2835) || training loss 0.6636 || training accuracy 80.00% || lr 3.60766452988184e-05


 98%|█████████▊| 2770/2835 [1:20:25<01:51,  1.72s/it]

Epoch[0/10](2770/2835) || training loss 0.6662 || training accuracy 80.83% || lr 3.32457066004813e-05


 98%|█████████▊| 2780/2835 [1:20:43<01:35,  1.75s/it]

Epoch[0/10](2780/2835) || training loss 0.6661 || training accuracy 82.50% || lr 3.0530055907364446e-05


 98%|█████████▊| 2790/2835 [1:21:00<01:17,  1.72s/it]

Epoch[0/10](2790/2835) || training loss 0.69 || training accuracy 78.33% || lr 2.7930363265641128e-05


 99%|█████████▉| 2800/2835 [1:21:17<01:02,  1.77s/it]

Epoch[0/10](2800/2835) || training loss 0.6585 || training accuracy 82.50% || lr 2.5447270110570814e-05


 99%|█████████▉| 2810/2835 [1:21:35<00:43,  1.72s/it]

Epoch[0/10](2810/2835) || training loss 0.6799 || training accuracy 80.00% || lr 2.3081389108234106e-05


 99%|█████████▉| 2820/2835 [1:21:52<00:26,  1.74s/it]

Epoch[0/10](2820/2835) || training loss 0.661 || training accuracy 83.33% || lr 2.0833304004366943e-05


100%|█████████▉| 2830/2835 [1:22:10<00:08,  1.78s/it]

Epoch[0/10](2830/2835) || training loss 0.6794 || training accuracy 81.67% || lr 1.8703569480329918e-05


100%|██████████| 2835/2835 [1:22:18<00:00,  1.74s/it]

Calculating validation results...





New best model for val accuracy : 83.57%! saving the best model..


  0%|          | 0/2835 [00:00<?, ?it/s]

[Val] acc : 83.57%, loss: 0.67 || best acc : 83.57%, best loss: 0.67


  0%|          | 10/2835 [00:17<1:21:47,  1.74s/it]

Epoch[1/10](10/2835) || training loss 0.6772 || training accuracy 76.67% || lr 1.573201650246304e-05


  1%|          | 20/2835 [00:34<1:21:58,  1.75s/it]

Epoch[1/10](20/2835) || training loss 0.6633 || training accuracy 81.67% || lr 1.3900393208560256e-05


  1%|          | 30/2835 [00:52<1:21:33,  1.74s/it]

Epoch[1/10](30/2835) || training loss 0.6797 || training accuracy 81.67% || lr 1.21888310864871e-05


  1%|▏         | 40/2835 [01:09<1:21:16,  1.74s/it]

Epoch[1/10](40/2835) || training loss 0.67 || training accuracy 81.67% || lr 1.0597752438586402e-05


  2%|▏         | 50/2835 [01:27<1:20:54,  1.74s/it]

Epoch[1/10](50/2835) || training loss 0.6902 || training accuracy 78.33% || lr 9.127549839706854e-06


  2%|▏         | 60/2835 [01:44<1:19:33,  1.72s/it]

Epoch[1/10](60/2835) || training loss 0.6625 || training accuracy 84.17% || lr 7.778586040340532e-06


  2%|▏         | 70/2835 [02:01<1:20:19,  1.74s/it]

Epoch[1/10](70/2835) || training loss 0.6801 || training accuracy 80.83% || lr 6.551193877119995e-06


  3%|▎         | 80/2835 [02:19<1:19:57,  1.74s/it]

Epoch[1/10](80/2835) || training loss 0.6539 || training accuracy 80.00% || lr 5.445676190695625e-06


  3%|▎         | 90/2835 [02:36<1:19:04,  1.73s/it]

Epoch[1/10](90/2835) || training loss 0.6662 || training accuracy 80.00% || lr 4.462305751014317e-06


  4%|▎         | 100/2835 [02:53<1:18:09,  1.71s/it]

Epoch[1/10](100/2835) || training loss 0.7045 || training accuracy 74.17% || lr 3.601325190017624e-06


  4%|▍         | 110/2835 [03:10<1:17:53,  1.71s/it]

Epoch[1/10](110/2835) || training loss 0.6737 || training accuracy 74.17% || lr 2.8629469417758424e-06


  4%|▍         | 120/2835 [03:27<1:17:40,  1.72s/it]

Epoch[1/10](120/2835) || training loss 0.6553 || training accuracy 83.33% || lr 2.2473531900731134e-06


  5%|▍         | 130/2835 [03:45<1:17:41,  1.72s/it]

Epoch[1/10](130/2835) || training loss 0.6928 || training accuracy 76.67% || lr 1.7546958234563027e-06


  5%|▍         | 140/2835 [04:02<1:18:29,  1.75s/it]

Epoch[1/10](140/2835) || training loss 0.6515 || training accuracy 81.67% || lr 1.385096397758911e-06


  5%|▌         | 150/2835 [04:19<1:17:48,  1.74s/it]

Epoch[1/10](150/2835) || training loss 0.6801 || training accuracy 80.83% || lr 1.1386461061087215e-06


  6%|▌         | 160/2835 [04:37<1:17:30,  1.74s/it]

Epoch[1/10](160/2835) || training loss 0.6938 || training accuracy 80.00% || lr 1.0154057564275631e-06


  6%|▌         | 170/2835 [04:54<1:16:14,  1.72s/it]

Epoch[1/10](170/2835) || training loss 0.6616 || training accuracy 81.67% || lr 3.4949999999999996e-06


  6%|▋         | 180/2835 [05:11<1:16:30,  1.73s/it]

Epoch[1/10](180/2835) || training loss 0.6946 || training accuracy 75.83% || lr 8.485e-06


  7%|▋         | 190/2835 [05:29<1:16:44,  1.74s/it]

Epoch[1/10](190/2835) || training loss 0.7036 || training accuracy 78.33% || lr 1.3475e-05


  7%|▋         | 200/2835 [05:46<1:15:32,  1.72s/it]

Epoch[1/10](200/2835) || training loss 0.6539 || training accuracy 81.67% || lr 1.8464999999999997e-05


  7%|▋         | 210/2835 [06:03<1:15:45,  1.73s/it]

Epoch[1/10](210/2835) || training loss 0.7114 || training accuracy 78.33% || lr 2.3454999999999998e-05


  8%|▊         | 220/2835 [06:21<1:15:51,  1.74s/it]

Epoch[1/10](220/2835) || training loss 0.6685 || training accuracy 77.50% || lr 2.8445e-05


  8%|▊         | 230/2835 [06:38<1:15:31,  1.74s/it]

Epoch[1/10](230/2835) || training loss 0.6693 || training accuracy 83.33% || lr 3.3434999999999994e-05


  8%|▊         | 240/2835 [06:56<1:16:00,  1.76s/it]

Epoch[1/10](240/2835) || training loss 0.6731 || training accuracy 84.17% || lr 3.8425e-05


  9%|▉         | 250/2835 [07:13<1:14:28,  1.73s/it]

Epoch[1/10](250/2835) || training loss 0.6218 || training accuracy 82.50% || lr 4.3415e-05


  9%|▉         | 260/2835 [07:30<1:14:47,  1.74s/it]

Epoch[1/10](260/2835) || training loss 0.6469 || training accuracy 85.83% || lr 4.8404999999999995e-05


 10%|▉         | 270/2835 [07:48<1:14:20,  1.74s/it]

Epoch[1/10](270/2835) || training loss 0.6642 || training accuracy 83.33% || lr 5.339499999999999e-05


 10%|▉         | 280/2835 [08:05<1:14:12,  1.74s/it]

Epoch[1/10](280/2835) || training loss 0.69 || training accuracy 79.17% || lr 5.8385e-05


 10%|█         | 290/2835 [08:22<1:13:48,  1.74s/it]

Epoch[1/10](290/2835) || training loss 0.7022 || training accuracy 80.83% || lr 6.3375e-05


 11%|█         | 300/2835 [08:40<1:13:45,  1.75s/it]

Epoch[1/10](300/2835) || training loss 0.6679 || training accuracy 82.50% || lr 6.836499999999999e-05


 11%|█         | 310/2835 [08:57<1:12:51,  1.73s/it]

Epoch[1/10](310/2835) || training loss 0.651 || training accuracy 84.17% || lr 7.3355e-05


 11%|█▏        | 320/2835 [09:14<1:12:47,  1.74s/it]

Epoch[1/10](320/2835) || training loss 0.7008 || training accuracy 81.67% || lr 7.8345e-05


 12%|█▏        | 330/2835 [09:32<1:12:41,  1.74s/it]

Epoch[1/10](330/2835) || training loss 0.6586 || training accuracy 79.17% || lr 8.333499999999999e-05


 12%|█▏        | 340/2835 [09:49<1:11:47,  1.73s/it]

Epoch[1/10](340/2835) || training loss 0.6665 || training accuracy 79.17% || lr 8.832499999999999e-05


 12%|█▏        | 350/2835 [10:06<1:12:01,  1.74s/it]

Epoch[1/10](350/2835) || training loss 0.6698 || training accuracy 78.33% || lr 9.3315e-05


 13%|█▎        | 360/2835 [10:24<1:11:56,  1.74s/it]

Epoch[1/10](360/2835) || training loss 0.6894 || training accuracy 79.17% || lr 9.8305e-05


 13%|█▎        | 370/2835 [10:41<1:11:40,  1.74s/it]

Epoch[1/10](370/2835) || training loss 0.639 || training accuracy 84.17% || lr 0.00010329499999999999


 13%|█▎        | 380/2835 [10:59<1:10:48,  1.73s/it]

Epoch[1/10](380/2835) || training loss 0.6563 || training accuracy 83.33% || lr 0.00010828499999999998


 14%|█▍        | 390/2835 [11:16<1:10:12,  1.72s/it]

Epoch[1/10](390/2835) || training loss 0.6791 || training accuracy 79.17% || lr 0.000113275


 14%|█▍        | 400/2835 [11:33<1:10:32,  1.74s/it]

Epoch[1/10](400/2835) || training loss 0.7087 || training accuracy 78.33% || lr 0.000118265


 14%|█▍        | 410/2835 [11:51<1:10:13,  1.74s/it]

Epoch[1/10](410/2835) || training loss 0.6909 || training accuracy 77.50% || lr 0.000123255


 15%|█▍        | 420/2835 [12:08<1:10:04,  1.74s/it]

Epoch[1/10](420/2835) || training loss 0.6641 || training accuracy 81.67% || lr 0.000128245


 15%|█▌        | 430/2835 [12:25<1:09:54,  1.74s/it]

Epoch[1/10](430/2835) || training loss 0.7048 || training accuracy 79.17% || lr 0.000133235


 16%|█▌        | 440/2835 [12:43<1:09:30,  1.74s/it]

Epoch[1/10](440/2835) || training loss 0.696 || training accuracy 79.17% || lr 0.00013822499999999999


 16%|█▌        | 450/2835 [13:00<1:09:10,  1.74s/it]

Epoch[1/10](450/2835) || training loss 0.6697 || training accuracy 80.00% || lr 0.000143215


 16%|█▌        | 460/2835 [13:18<1:08:58,  1.74s/it]

Epoch[1/10](460/2835) || training loss 0.6629 || training accuracy 83.33% || lr 0.000148205


 17%|█▋        | 470/2835 [13:35<1:08:18,  1.73s/it]

Epoch[1/10](470/2835) || training loss 0.7126 || training accuracy 81.67% || lr 0.00015319499999999999


 17%|█▋        | 480/2835 [13:53<1:08:29,  1.75s/it]

Epoch[1/10](480/2835) || training loss 0.6253 || training accuracy 82.50% || lr 0.00015818499999999998


 17%|█▋        | 490/2835 [14:10<1:08:27,  1.75s/it]

Epoch[1/10](490/2835) || training loss 0.6293 || training accuracy 85.00% || lr 0.000163175


 18%|█▊        | 500/2835 [14:27<1:07:08,  1.73s/it]

Epoch[1/10](500/2835) || training loss 0.6505 || training accuracy 85.83% || lr 0.00016816500000000001


 18%|█▊        | 510/2835 [14:45<1:06:56,  1.73s/it]

Epoch[1/10](510/2835) || training loss 0.6771 || training accuracy 83.33% || lr 0.000173155


 18%|█▊        | 520/2835 [15:02<1:06:58,  1.74s/it]

Epoch[1/10](520/2835) || training loss 0.6605 || training accuracy 82.50% || lr 0.000178145


 19%|█▊        | 530/2835 [15:19<1:06:39,  1.74s/it]

Epoch[1/10](530/2835) || training loss 0.7155 || training accuracy 73.33% || lr 0.000183135


 19%|█▉        | 540/2835 [15:37<1:08:30,  1.79s/it]

Epoch[1/10](540/2835) || training loss 0.686 || training accuracy 78.33% || lr 0.00018812499999999998


 19%|█▉        | 550/2835 [15:54<1:05:35,  1.72s/it]

Epoch[1/10](550/2835) || training loss 0.6862 || training accuracy 79.17% || lr 0.000193115


 20%|█▉        | 560/2835 [16:11<1:06:02,  1.74s/it]

Epoch[1/10](560/2835) || training loss 0.6683 || training accuracy 76.67% || lr 0.000198105


 20%|██        | 570/2835 [16:29<1:05:57,  1.75s/it]

Epoch[1/10](570/2835) || training loss 0.6719 || training accuracy 83.33% || lr 0.000203095


 20%|██        | 580/2835 [16:46<1:05:05,  1.73s/it]

Epoch[1/10](580/2835) || training loss 0.6633 || training accuracy 80.83% || lr 0.000208085


 21%|██        | 590/2835 [17:04<1:05:07,  1.74s/it]

Epoch[1/10](590/2835) || training loss 0.7022 || training accuracy 85.83% || lr 0.000213075


 21%|██        | 600/2835 [17:21<1:04:59,  1.74s/it]

Epoch[1/10](600/2835) || training loss 0.6973 || training accuracy 80.83% || lr 0.00021806499999999998


 22%|██▏       | 610/2835 [17:38<1:04:26,  1.74s/it]

Epoch[1/10](610/2835) || training loss 0.676 || training accuracy 83.33% || lr 0.000223055


 22%|██▏       | 620/2835 [17:56<1:03:21,  1.72s/it]

Epoch[1/10](620/2835) || training loss 0.6921 || training accuracy 85.83% || lr 0.000228045


 22%|██▏       | 630/2835 [18:13<1:03:03,  1.72s/it]

Epoch[1/10](630/2835) || training loss 0.6813 || training accuracy 79.17% || lr 0.00023303499999999998


 23%|██▎       | 640/2835 [18:30<1:03:22,  1.73s/it]

Epoch[1/10](640/2835) || training loss 0.69 || training accuracy 75.83% || lr 0.00023802499999999997


 23%|██▎       | 650/2835 [18:47<1:03:19,  1.74s/it]

Epoch[1/10](650/2835) || training loss 0.6945 || training accuracy 78.33% || lr 0.000243015


 23%|██▎       | 660/2835 [19:05<1:03:08,  1.74s/it]

Epoch[1/10](660/2835) || training loss 0.66 || training accuracy 83.33% || lr 0.00024800500000000004


 24%|██▎       | 670/2835 [19:22<1:02:52,  1.74s/it]

Epoch[1/10](670/2835) || training loss 0.7159 || training accuracy 75.83% || lr 0.000252995


 24%|██▍       | 680/2835 [19:40<1:02:21,  1.74s/it]

Epoch[1/10](680/2835) || training loss 0.6542 || training accuracy 79.17% || lr 0.000257985


 24%|██▍       | 690/2835 [19:57<1:01:24,  1.72s/it]

Epoch[1/10](690/2835) || training loss 0.6212 || training accuracy 79.17% || lr 0.00026297500000000004


 25%|██▍       | 700/2835 [20:14<1:01:03,  1.72s/it]

Epoch[1/10](700/2835) || training loss 0.6846 || training accuracy 79.17% || lr 0.00026796500000000005


 25%|██▌       | 710/2835 [20:31<1:00:49,  1.72s/it]

Epoch[1/10](710/2835) || training loss 0.6397 || training accuracy 81.67% || lr 0.000272955


 25%|██▌       | 720/2835 [20:49<1:01:29,  1.74s/it]

Epoch[1/10](720/2835) || training loss 0.6976 || training accuracy 75.00% || lr 0.00027794500000000004


 26%|██▌       | 730/2835 [21:06<1:01:08,  1.74s/it]

Epoch[1/10](730/2835) || training loss 0.6896 || training accuracy 81.67% || lr 0.000282935


 26%|██▌       | 740/2835 [21:23<58:58,  1.69s/it]  

Epoch[1/10](740/2835) || training loss 0.6669 || training accuracy 83.33% || lr 0.000287925


 26%|██▋       | 750/2835 [21:41<1:00:43,  1.75s/it]

Epoch[1/10](750/2835) || training loss 0.7031 || training accuracy 74.17% || lr 0.000292915


 27%|██▋       | 760/2835 [21:58<1:00:15,  1.74s/it]

Epoch[1/10](760/2835) || training loss 0.6833 || training accuracy 82.50% || lr 0.000297905


 27%|██▋       | 770/2835 [22:16<59:58,  1.74s/it]  

Epoch[1/10](770/2835) || training loss 0.6388 || training accuracy 82.50% || lr 0.000302895


 28%|██▊       | 780/2835 [22:33<59:37,  1.74s/it]

Epoch[1/10](780/2835) || training loss 0.6698 || training accuracy 78.33% || lr 0.00030788500000000004


 28%|██▊       | 790/2835 [22:51<59:03,  1.73s/it]  

Epoch[1/10](790/2835) || training loss 0.7075 || training accuracy 73.33% || lr 0.00031287500000000006


 28%|██▊       | 800/2835 [23:08<59:03,  1.74s/it]

Epoch[1/10](800/2835) || training loss 0.7047 || training accuracy 80.00% || lr 0.000317865


 29%|██▊       | 810/2835 [23:25<58:10,  1.72s/it]

Epoch[1/10](810/2835) || training loss 0.6771 || training accuracy 78.33% || lr 0.00032285500000000004


 29%|██▉       | 820/2835 [23:42<57:38,  1.72s/it]

Epoch[1/10](820/2835) || training loss 0.689 || training accuracy 76.67% || lr 0.000327845


 29%|██▉       | 830/2835 [24:00<58:39,  1.76s/it]

Epoch[1/10](830/2835) || training loss 0.6597 || training accuracy 84.17% || lr 0.000332835


 30%|██▉       | 840/2835 [24:17<57:12,  1.72s/it]

Epoch[1/10](840/2835) || training loss 0.6851 || training accuracy 80.00% || lr 0.000337825


 30%|██▉       | 850/2835 [24:34<57:43,  1.75s/it]

Epoch[1/10](850/2835) || training loss 0.6806 || training accuracy 78.33% || lr 0.000342815


 30%|███       | 860/2835 [24:52<57:06,  1.74s/it]

Epoch[1/10](860/2835) || training loss 0.6672 || training accuracy 81.67% || lr 0.000347805


 31%|███       | 870/2835 [25:09<56:51,  1.74s/it]

Epoch[1/10](870/2835) || training loss 0.6899 || training accuracy 73.33% || lr 0.000352795


 31%|███       | 880/2835 [25:27<57:08,  1.75s/it]

Epoch[1/10](880/2835) || training loss 0.6867 || training accuracy 79.17% || lr 0.000357785


 31%|███▏      | 890/2835 [25:44<56:15,  1.74s/it]

Epoch[1/10](890/2835) || training loss 0.6252 || training accuracy 85.00% || lr 0.000362775


 32%|███▏      | 900/2835 [26:01<56:07,  1.74s/it]

Epoch[1/10](900/2835) || training loss 0.6634 || training accuracy 83.33% || lr 0.00036776500000000004


 32%|███▏      | 910/2835 [26:19<56:06,  1.75s/it]

Epoch[1/10](910/2835) || training loss 0.6837 || training accuracy 83.33% || lr 0.000372755


 32%|███▏      | 920/2835 [26:36<54:57,  1.72s/it]

Epoch[1/10](920/2835) || training loss 0.6849 || training accuracy 74.17% || lr 0.000377745


 33%|███▎      | 930/2835 [26:53<55:19,  1.74s/it]

Epoch[1/10](930/2835) || training loss 0.7084 || training accuracy 76.67% || lr 0.00038273500000000004


 33%|███▎      | 940/2835 [27:11<56:44,  1.80s/it]

Epoch[1/10](940/2835) || training loss 0.6697 || training accuracy 77.50% || lr 0.000387725


 34%|███▎      | 950/2835 [27:28<54:36,  1.74s/it]

Epoch[1/10](950/2835) || training loss 0.671 || training accuracy 85.00% || lr 0.000392715


 34%|███▍      | 960/2835 [27:46<54:25,  1.74s/it]

Epoch[1/10](960/2835) || training loss 0.7094 || training accuracy 79.17% || lr 0.000397705


 34%|███▍      | 970/2835 [28:03<53:43,  1.73s/it]

Epoch[1/10](970/2835) || training loss 0.7066 || training accuracy 74.17% || lr 0.000402695


 35%|███▍      | 980/2835 [28:20<53:20,  1.73s/it]

Epoch[1/10](980/2835) || training loss 0.7037 || training accuracy 81.67% || lr 0.00040768499999999997


 35%|███▍      | 990/2835 [28:38<53:26,  1.74s/it]

Epoch[1/10](990/2835) || training loss 0.6773 || training accuracy 79.17% || lr 0.00041267500000000004


 35%|███▌      | 1000/2835 [28:55<53:27,  1.75s/it]

Epoch[1/10](1000/2835) || training loss 0.6925 || training accuracy 74.17% || lr 0.000417665


 36%|███▌      | 1010/2835 [29:13<53:08,  1.75s/it]

Epoch[1/10](1010/2835) || training loss 0.6836 || training accuracy 80.00% || lr 0.000422655


 36%|███▌      | 1020/2835 [29:30<52:43,  1.74s/it]

Epoch[1/10](1020/2835) || training loss 0.7054 || training accuracy 79.17% || lr 0.00042764500000000004


 36%|███▋      | 1030/2835 [29:47<52:33,  1.75s/it]

Epoch[1/10](1030/2835) || training loss 0.6719 || training accuracy 80.83% || lr 0.000432635


 37%|███▋      | 1040/2835 [30:05<51:48,  1.73s/it]

Epoch[1/10](1040/2835) || training loss 0.6936 || training accuracy 76.67% || lr 0.000437625


 37%|███▋      | 1050/2835 [30:22<51:55,  1.75s/it]

Epoch[1/10](1050/2835) || training loss 0.6521 || training accuracy 83.33% || lr 0.000442615


 37%|███▋      | 1060/2835 [30:40<51:17,  1.73s/it]

Epoch[1/10](1060/2835) || training loss 0.6777 || training accuracy 79.17% || lr 0.000447605


 38%|███▊      | 1070/2835 [30:57<51:12,  1.74s/it]

Epoch[1/10](1070/2835) || training loss 0.6852 || training accuracy 85.00% || lr 0.00045259499999999997


 38%|███▊      | 1080/2835 [31:14<50:57,  1.74s/it]

Epoch[1/10](1080/2835) || training loss 0.7246 || training accuracy 78.33% || lr 0.000457585


 38%|███▊      | 1090/2835 [31:32<50:44,  1.74s/it]

Epoch[1/10](1090/2835) || training loss 0.6903 || training accuracy 74.17% || lr 0.00046257500000000006


 39%|███▉      | 1100/2835 [31:49<50:23,  1.74s/it]

Epoch[1/10](1100/2835) || training loss 0.6881 || training accuracy 77.50% || lr 0.000467565


 39%|███▉      | 1110/2835 [32:07<50:00,  1.74s/it]

Epoch[1/10](1110/2835) || training loss 0.6841 || training accuracy 85.00% || lr 0.00047255500000000005


 40%|███▉      | 1120/2835 [32:24<49:31,  1.73s/it]

Epoch[1/10](1120/2835) || training loss 0.6652 || training accuracy 82.50% || lr 0.000477545


 40%|███▉      | 1130/2835 [32:41<49:34,  1.74s/it]

Epoch[1/10](1130/2835) || training loss 0.6799 || training accuracy 75.83% || lr 0.00048253500000000003


 40%|████      | 1140/2835 [32:59<49:21,  1.75s/it]

Epoch[1/10](1140/2835) || training loss 0.7153 || training accuracy 75.00% || lr 0.000487525


 41%|████      | 1150/2835 [33:16<48:57,  1.74s/it]

Epoch[1/10](1150/2835) || training loss 0.6717 || training accuracy 80.83% || lr 0.000492515


 41%|████      | 1160/2835 [33:34<48:40,  1.74s/it]

Epoch[1/10](1160/2835) || training loss 0.6648 || training accuracy 80.00% || lr 0.000497505


 41%|████▏     | 1170/2835 [33:51<48:14,  1.74s/it]

Epoch[1/10](1170/2835) || training loss 0.6985 || training accuracy 77.50% || lr 0.000499992304832375


 42%|████▏     | 1180/2835 [34:09<53:29,  1.94s/it]

Epoch[1/10](1180/2835) || training loss 0.6652 || training accuracy 78.33% || lr 0.0004999307463393911


 42%|████▏     | 1190/2835 [34:30<57:26,  2.10s/it]

Epoch[1/10](1190/2835) || training loss 0.6687 || training accuracy 80.00% || lr 0.0004998076445420604


 42%|████▏     | 1200/2835 [34:51<57:16,  2.10s/it]

Epoch[1/10](1200/2835) || training loss 0.6888 || training accuracy 80.00% || lr 0.0004996230298139093


 43%|████▎     | 1210/2835 [35:12<56:51,  2.10s/it]

Epoch[1/10](1210/2835) || training loss 0.6384 || training accuracy 85.00% || lr 0.0004993769477058594


 43%|████▎     | 1220/2835 [35:34<56:44,  2.11s/it]

Epoch[1/10](1220/2835) || training loss 0.6657 || training accuracy 76.67% || lr 0.0004990694589349888


 43%|████▎     | 1230/2835 [35:55<56:24,  2.11s/it]

Epoch[1/10](1230/2835) || training loss 0.6887 || training accuracy 71.67% || lr 0.0004987006393695507


 44%|████▎     | 1240/2835 [36:11<45:10,  1.70s/it]

Epoch[1/10](1240/2835) || training loss 0.6856 || training accuracy 74.17% || lr 0.0004982705800102541


 44%|████▍     | 1250/2835 [36:28<45:12,  1.71s/it]

Epoch[1/10](1250/2835) || training loss 0.64 || training accuracy 80.00% || lr 0.0004977793869678107


 44%|████▍     | 1260/2835 [36:45<45:01,  1.72s/it]

Epoch[1/10](1260/2835) || training loss 0.6814 || training accuracy 80.83% || lr 0.0004972271814367539


 45%|████▍     | 1270/2835 [37:03<45:00,  1.73s/it]

Epoch[1/10](1270/2835) || training loss 0.6541 || training accuracy 80.00% || lr 0.0004966140996655356


 45%|████▌     | 1280/2835 [37:20<44:41,  1.72s/it]

Epoch[1/10](1280/2835) || training loss 0.6978 || training accuracy 75.00% || lr 0.0004959402929229092


 46%|████▌     | 1290/2835 [37:37<44:44,  1.74s/it]

Epoch[1/10](1290/2835) || training loss 0.6671 || training accuracy 84.17% || lr 0.000495205927460606


 46%|████▌     | 1300/2835 [37:55<44:04,  1.72s/it]

Epoch[1/10](1300/2835) || training loss 0.7005 || training accuracy 75.83% || lr 0.0004944111844723153


 46%|████▌     | 1310/2835 [38:12<43:31,  1.71s/it]

Epoch[1/10](1310/2835) || training loss 0.7013 || training accuracy 83.33% || lr 0.0004935562600489775


 47%|████▋     | 1320/2835 [38:29<43:28,  1.72s/it]

Epoch[1/10](1320/2835) || training loss 0.6582 || training accuracy 85.83% || lr 0.0004926413651304013


 47%|████▋     | 1330/2835 [38:46<43:32,  1.74s/it]

Epoch[1/10](1330/2835) || training loss 0.699 || training accuracy 77.50% || lr 0.0004916667254532184


 47%|████▋     | 1340/2835 [39:03<43:04,  1.73s/it]

Epoch[1/10](1340/2835) || training loss 0.7216 || training accuracy 66.67% || lr 0.000490632581495185


 48%|████▊     | 1350/2835 [39:21<42:44,  1.73s/it]

Epoch[1/10](1350/2835) || training loss 0.6984 || training accuracy 74.17% || lr 0.0004895391884158487


 48%|████▊     | 1360/2835 [39:38<42:05,  1.71s/it]

Epoch[1/10](1360/2835) || training loss 0.674 || training accuracy 78.33% || lr 0.0004883868159935908


 48%|████▊     | 1370/2835 [39:55<42:07,  1.73s/it]

Epoch[1/10](1370/2835) || training loss 0.6547 || training accuracy 81.67% || lr 0.0004871757485590634


 49%|████▊     | 1380/2835 [40:12<42:13,  1.74s/it]

Epoch[1/10](1380/2835) || training loss 0.6922 || training accuracy 79.17% || lr 0.00048590628492503434


 49%|████▉     | 1390/2835 [40:30<41:59,  1.74s/it]

Epoch[1/10](1390/2835) || training loss 0.6927 || training accuracy 86.67% || lr 0.0004845787383126598


 49%|████▉     | 1400/2835 [40:47<41:42,  1.74s/it]

Epoch[1/10](1400/2835) || training loss 0.644 || training accuracy 84.17% || lr 0.00048319343627420197


 50%|████▉     | 1410/2835 [41:05<41:47,  1.76s/it]

Epoch[1/10](1410/2835) || training loss 0.6949 || training accuracy 76.67% || lr 0.0004817507206122101


 50%|█████     | 1420/2835 [41:22<40:24,  1.71s/it]

Epoch[1/10](1420/2835) || training loss 0.6865 || training accuracy 77.50% || lr 0.0004802509472951861


 50%|█████     | 1430/2835 [41:39<40:06,  1.71s/it]

Epoch[1/10](1430/2835) || training loss 0.6761 || training accuracy 82.50% || lr 0.0004786944863697541


 51%|█████     | 1440/2835 [41:56<40:13,  1.73s/it]

Epoch[1/10](1440/2835) || training loss 0.6678 || training accuracy 80.83% || lr 0.0004770817218693578


 51%|█████     | 1450/2835 [42:14<39:55,  1.73s/it]

Epoch[1/10](1450/2835) || training loss 0.6915 || training accuracy 78.33% || lr 0.0004754130517195053


 51%|█████▏    | 1460/2835 [42:31<40:00,  1.75s/it]

Epoch[1/10](1460/2835) || training loss 0.6997 || training accuracy 76.67% || lr 0.00047368888763958726


 52%|█████▏    | 1470/2835 [42:49<39:08,  1.72s/it]

Epoch[1/10](1470/2835) || training loss 0.7029 || training accuracy 72.50% || lr 0.00047190965504129124


 52%|█████▏    | 1480/2835 [43:06<38:54,  1.72s/it]

Epoch[1/10](1480/2835) || training loss 0.7243 || training accuracy 73.33% || lr 0.0004700757929236375


 53%|█████▎    | 1490/2835 [43:23<38:38,  1.72s/it]

Epoch[1/10](1490/2835) || training loss 0.7005 || training accuracy 76.67% || lr 0.0004681877537646629


 53%|█████▎    | 1500/2835 [43:40<38:11,  1.72s/it]

Epoch[1/10](1500/2835) || training loss 0.6698 || training accuracy 80.83% || lr 0.0004662460034097787


 53%|█████▎    | 1510/2835 [43:58<38:27,  1.74s/it]

Epoch[1/10](1510/2835) || training loss 0.7292 || training accuracy 71.67% || lr 0.0004642510209568299


 54%|█████▎    | 1520/2835 [44:15<37:37,  1.72s/it]

Epoch[1/10](1520/2835) || training loss 0.676 || training accuracy 78.33% || lr 0.0004622032986378852


 54%|█████▍    | 1530/2835 [44:32<37:17,  1.71s/it]

Epoch[1/10](1530/2835) || training loss 0.7107 || training accuracy 80.83% || lr 0.000460103341697786


 54%|█████▍    | 1540/2835 [44:49<36:58,  1.71s/it]

Epoch[1/10](1540/2835) || training loss 0.6649 || training accuracy 85.83% || lr 0.00045795166826948506


 55%|█████▍    | 1550/2835 [45:06<36:14,  1.69s/it]

Epoch[1/10](1550/2835) || training loss 0.6205 || training accuracy 88.33% || lr 0.0004557488092462045


 55%|█████▌    | 1560/2835 [45:23<36:31,  1.72s/it]

Epoch[1/10](1560/2835) || training loss 0.7053 || training accuracy 73.33% || lr 0.0004534953081504464


 55%|█████▌    | 1570/2835 [45:41<36:35,  1.74s/it]

Epoch[1/10](1570/2835) || training loss 0.674 || training accuracy 79.17% || lr 0.0004511917209998861


 56%|█████▌    | 1580/2835 [45:58<36:25,  1.74s/it]

Epoch[1/10](1580/2835) || training loss 0.6482 || training accuracy 84.17% || lr 0.00044883861617018374


 56%|█████▌    | 1590/2835 [46:15<35:54,  1.73s/it]

Epoch[1/10](1590/2835) || training loss 0.6831 || training accuracy 77.50% || lr 0.00044643657425474584


 56%|█████▋    | 1600/2835 [46:33<35:50,  1.74s/it]

Epoch[1/10](1600/2835) || training loss 0.6836 || training accuracy 83.33% || lr 0.0004439861879214725


 57%|█████▋    | 1610/2835 [46:50<35:36,  1.74s/it]

Epoch[1/10](1610/2835) || training loss 0.7031 || training accuracy 80.00% || lr 0.0004414880617665255


 57%|█████▋    | 1620/2835 [47:08<35:19,  1.74s/it]

Epoch[1/10](1620/2835) || training loss 0.6519 || training accuracy 82.50% || lr 0.0004389428121651533


 57%|█████▋    | 1630/2835 [47:25<34:24,  1.71s/it]

Epoch[1/10](1630/2835) || training loss 0.6921 || training accuracy 80.00% || lr 0.0004363510671196097


 58%|█████▊    | 1640/2835 [47:42<34:06,  1.71s/it]

Epoch[1/10](1640/2835) || training loss 0.6851 || training accuracy 77.50% || lr 0.00043371346610420353


 58%|█████▊    | 1650/2835 [47:59<33:54,  1.72s/it]

Epoch[1/10](1650/2835) || training loss 0.6678 || training accuracy 80.83% || lr 0.0004310306599075181


 59%|█████▊    | 1660/2835 [48:16<34:07,  1.74s/it]

Epoch[1/10](1660/2835) || training loss 0.6365 || training accuracy 86.67% || lr 0.00042830331047183877


 59%|█████▉    | 1670/2835 [48:34<33:52,  1.74s/it]

Epoch[1/10](1670/2835) || training loss 0.6509 || training accuracy 80.00% || lr 0.00042553209072982855


 59%|█████▉    | 1680/2835 [48:51<33:34,  1.74s/it]

Epoch[1/10](1680/2835) || training loss 0.6844 || training accuracy 75.83% || lr 0.0004227176844384921


 60%|█████▉    | 1690/2835 [49:09<33:24,  1.75s/it]

Epoch[1/10](1690/2835) || training loss 0.6901 || training accuracy 77.50% || lr 0.00041986078601046904


 60%|█████▉    | 1700/2835 [49:26<32:49,  1.74s/it]

Epoch[1/10](1700/2835) || training loss 0.7027 || training accuracy 74.17% || lr 0.00041696210034269764


 60%|██████    | 1710/2835 [49:43<32:36,  1.74s/it]

Epoch[1/10](1710/2835) || training loss 0.6827 || training accuracy 76.67% || lr 0.00041402234264249256


 61%|██████    | 1720/2835 [50:01<31:53,  1.72s/it]

Epoch[1/10](1720/2835) || training loss 0.6354 || training accuracy 87.50% || lr 0.00041104223825107786


 61%|██████    | 1730/2835 [50:18<31:37,  1.72s/it]

Epoch[1/10](1730/2835) || training loss 0.7194 || training accuracy 70.83% || lr 0.0004080225224646197


 61%|██████▏   | 1740/2835 [50:35<31:35,  1.73s/it]

Epoch[1/10](1740/2835) || training loss 0.6935 || training accuracy 77.50% || lr 0.0004049639403528036


 62%|██████▏   | 1750/2835 [50:52<31:31,  1.74s/it]

Epoch[1/10](1750/2835) || training loss 0.6621 || training accuracy 80.00% || lr 0.0004018672465749989


 62%|██████▏   | 1760/2835 [51:10<31:11,  1.74s/it]

Epoch[1/10](1760/2835) || training loss 0.6649 || training accuracy 76.67% || lr 0.00039873320519405865


 62%|██████▏   | 1770/2835 [51:27<30:22,  1.71s/it]

Epoch[1/10](1770/2835) || training loss 0.7009 || training accuracy 77.50% || lr 0.00039556258948779773


 63%|██████▎   | 1780/2835 [51:44<30:32,  1.74s/it]

Epoch[1/10](1780/2835) || training loss 0.6936 || training accuracy 75.83% || lr 0.0003923561817581989


 63%|██████▎   | 1790/2835 [52:02<30:16,  1.74s/it]

Epoch[1/10](1790/2835) || training loss 0.669 || training accuracy 82.50% || lr 0.00038911477313839074


 63%|██████▎   | 1800/2835 [52:19<29:45,  1.73s/it]

Epoch[1/10](1800/2835) || training loss 0.7135 || training accuracy 72.50% || lr 0.00038583916339744815


 64%|██████▍   | 1810/2835 [52:36<29:21,  1.72s/it]

Epoch[1/10](1810/2835) || training loss 0.695 || training accuracy 84.17% || lr 0.0003825301607430606


 64%|██████▍   | 1820/2835 [52:54<28:53,  1.71s/it]

Epoch[1/10](1820/2835) || training loss 0.6662 || training accuracy 78.33% || lr 0.00037918858162211937


 65%|██████▍   | 1830/2835 [53:11<28:59,  1.73s/it]

Epoch[1/10](1830/2835) || training loss 0.6733 || training accuracy 79.17% || lr 0.00037581525051927144


 65%|██████▍   | 1840/2835 [53:28<28:53,  1.74s/it]

Epoch[1/10](1840/2835) || training loss 0.6997 || training accuracy 70.83% || lr 0.00037241099975349033


 65%|██████▌   | 1850/2835 [53:46<28:28,  1.73s/it]

Epoch[1/10](1850/2835) || training loss 0.7452 || training accuracy 69.17% || lr 0.0003689766692727134


 66%|██████▌   | 1860/2835 [54:03<28:23,  1.75s/it]

Epoch[1/10](1860/2835) || training loss 0.7037 || training accuracy 80.83% || lr 0.0003655131064465979


 66%|██████▌   | 1870/2835 [54:21<28:01,  1.74s/it]

Epoch[1/10](1870/2835) || training loss 0.6936 || training accuracy 81.67% || lr 0.00036202116585744477


 66%|██████▋   | 1880/2835 [54:38<27:39,  1.74s/it]

Epoch[1/10](1880/2835) || training loss 0.7053 || training accuracy 75.83% || lr 0.0003585017090893433


 67%|██████▋   | 1890/2835 [54:55<27:31,  1.75s/it]

Epoch[1/10](1890/2835) || training loss 0.7023 || training accuracy 76.67% || lr 0.0003549556045155883


 67%|██████▋   | 1900/2835 [55:13<27:42,  1.78s/it]

Epoch[1/10](1900/2835) || training loss 0.6958 || training accuracy 75.00% || lr 0.0003513837270844219


 67%|██████▋   | 1910/2835 [55:31<27:29,  1.78s/it]

Epoch[1/10](1910/2835) || training loss 0.6683 || training accuracy 79.17% || lr 0.000347786958103153


 68%|██████▊   | 1920/2835 [55:48<26:03,  1.71s/it]

Epoch[1/10](1920/2835) || training loss 0.653 || training accuracy 80.83% || lr 0.00034416618502070814


 68%|██████▊   | 1930/2835 [56:05<26:10,  1.74s/it]

Epoch[1/10](1930/2835) || training loss 0.708 || training accuracy 73.33% || lr 0.0003405223012086667


 68%|██████▊   | 1940/2835 [56:22<25:51,  1.73s/it]

Epoch[1/10](1940/2835) || training loss 0.6758 || training accuracy 81.67% || lr 0.0003368562057408345


 69%|██████▉   | 1950/2835 [56:40<25:29,  1.73s/it]

Epoch[1/10](1950/2835) || training loss 0.6401 || training accuracy 83.33% || lr 0.00033316880317141144


 69%|██████▉   | 1960/2835 [56:57<25:21,  1.74s/it]

Epoch[1/10](1960/2835) || training loss 0.702 || training accuracy 80.00% || lr 0.0003294610033118058


 69%|██████▉   | 1970/2835 [57:14<25:08,  1.74s/it]

Epoch[1/10](1970/2835) || training loss 0.6733 || training accuracy 86.67% || lr 0.00032573372100615173


 70%|██████▉   | 1980/2835 [57:32<24:32,  1.72s/it]

Epoch[1/10](1980/2835) || training loss 0.6825 || training accuracy 83.33% || lr 0.00032198787590558567


 70%|███████   | 1990/2835 [57:49<24:31,  1.74s/it]

Epoch[1/10](1990/2835) || training loss 0.6552 || training accuracy 81.67% || lr 0.000318224392241336


 71%|███████   | 2000/2835 [58:07<24:16,  1.74s/it]

Epoch[1/10](2000/2835) || training loss 0.682 || training accuracy 81.67% || lr 0.0003144441985966827


 71%|███████   | 2010/2835 [58:24<23:58,  1.74s/it]

Epoch[1/10](2010/2835) || training loss 0.6783 || training accuracy 84.17% || lr 0.00031064822767784317


 71%|███████▏  | 2020/2835 [58:41<23:43,  1.75s/it]

Epoch[1/10](2020/2835) || training loss 0.7134 || training accuracy 73.33% || lr 0.00030683741608384144


 72%|███████▏  | 2030/2835 [58:59<23:23,  1.74s/it]

Epoch[1/10](2030/2835) || training loss 0.6797 || training accuracy 76.67% || lr 0.0003030127040754157


 72%|███████▏  | 2040/2835 [59:16<23:02,  1.74s/it]

Epoch[1/10](2040/2835) || training loss 0.6659 || training accuracy 79.17% || lr 0.0002991750353430241


 72%|███████▏  | 2050/2835 [59:34<22:34,  1.73s/it]

Epoch[1/10](2050/2835) || training loss 0.6499 || training accuracy 80.83% || lr 0.0002953253567740016


 73%|███████▎  | 2060/2835 [59:51<22:14,  1.72s/it]

Epoch[1/10](2060/2835) || training loss 0.6594 || training accuracy 82.50% || lr 0.0002914646182189313


 73%|███████▎  | 2070/2835 [1:00:08<21:58,  1.72s/it]

Epoch[1/10](2070/2835) || training loss 0.6626 || training accuracy 77.50% || lr 0.00028759377225728237


 73%|███████▎  | 2080/2835 [1:00:25<21:48,  1.73s/it]

Epoch[1/10](2080/2835) || training loss 0.6846 || training accuracy 83.33% || lr 0.0002837137739623749


 74%|███████▎  | 2090/2835 [1:00:43<21:36,  1.74s/it]

Epoch[1/10](2090/2835) || training loss 0.7019 || training accuracy 76.67% || lr 0.00027982558066573054


 74%|███████▍  | 2100/2835 [1:01:00<21:21,  1.74s/it]

Epoch[1/10](2100/2835) || training loss 0.6704 || training accuracy 77.50% || lr 0.000275930151720865


 74%|███████▍  | 2110/2835 [1:01:18<21:08,  1.75s/it]

Epoch[1/10](2110/2835) || training loss 0.6263 || training accuracy 88.33% || lr 0.00027202844826658195


 75%|███████▍  | 2120/2835 [1:01:35<20:26,  1.72s/it]

Epoch[1/10](2120/2835) || training loss 0.6836 || training accuracy 82.50% || lr 0.00026812143298982614


 75%|███████▌  | 2130/2835 [1:01:52<20:16,  1.72s/it]

Epoch[1/10](2130/2835) || training loss 0.6765 || training accuracy 79.17% || lr 0.00026421006988815523


 75%|███████▌  | 2140/2835 [1:02:10<20:09,  1.74s/it]

Epoch[1/10](2140/2835) || training loss 0.6586 || training accuracy 80.00% || lr 0.00026029532403188765


 76%|███████▌  | 2150/2835 [1:02:27<19:55,  1.75s/it]

Epoch[1/10](2150/2835) || training loss 0.6886 || training accuracy 77.50% || lr 0.0002563781613259858


 76%|███████▌  | 2160/2835 [1:02:44<19:24,  1.72s/it]

Epoch[1/10](2160/2835) || training loss 0.6665 || training accuracy 81.67% || lr 0.0002524595482717335


 77%|███████▋  | 2170/2835 [1:03:01<19:06,  1.72s/it]

Epoch[1/10](2170/2835) || training loss 0.6926 || training accuracy 77.50% || lr 0.00024854045172826656


 77%|███████▋  | 2180/2835 [1:03:19<18:57,  1.74s/it]

Epoch[1/10](2180/2835) || training loss 0.6756 || training accuracy 80.83% || lr 0.00024462183867401423


 77%|███████▋  | 2190/2835 [1:03:36<18:30,  1.72s/it]

Epoch[1/10](2190/2835) || training loss 0.6707 || training accuracy 81.67% || lr 0.00024070467596811238


 78%|███████▊  | 2200/2835 [1:03:53<18:43,  1.77s/it]

Epoch[1/10](2200/2835) || training loss 0.65 || training accuracy 81.67% || lr 0.00023678993011184483


 78%|███████▊  | 2210/2835 [1:04:11<18:01,  1.73s/it]

Epoch[1/10](2210/2835) || training loss 0.6729 || training accuracy 79.17% || lr 0.0002328785670101739


 78%|███████▊  | 2220/2835 [1:04:28<17:49,  1.74s/it]

Epoch[1/10](2220/2835) || training loss 0.6355 || training accuracy 81.67% || lr 0.00022897155173341814


 79%|███████▊  | 2230/2835 [1:04:45<17:34,  1.74s/it]

Epoch[1/10](2230/2835) || training loss 0.6783 || training accuracy 79.17% || lr 0.00022506984827913506


 79%|███████▉  | 2240/2835 [1:05:03<17:01,  1.72s/it]

Epoch[1/10](2240/2835) || training loss 0.6905 || training accuracy 78.33% || lr 0.00022117441933426952


 79%|███████▉  | 2250/2835 [1:05:20<16:49,  1.73s/it]

Epoch[1/10](2250/2835) || training loss 0.6817 || training accuracy 77.50% || lr 0.00021728622603762516


 80%|███████▉  | 2260/2835 [1:05:37<16:40,  1.74s/it]

Epoch[1/10](2260/2835) || training loss 0.631 || training accuracy 88.33% || lr 0.00021340622774271777


 80%|████████  | 2270/2835 [1:05:55<16:24,  1.74s/it]

Epoch[1/10](2270/2835) || training loss 0.6613 || training accuracy 85.00% || lr 0.0002095353817810687


 80%|████████  | 2280/2835 [1:06:12<16:07,  1.74s/it]

Epoch[1/10](2280/2835) || training loss 0.6823 || training accuracy 84.17% || lr 0.00020567464322599843


 81%|████████  | 2290/2835 [1:06:29<15:47,  1.74s/it]

Epoch[1/10](2290/2835) || training loss 0.6478 || training accuracy 84.17% || lr 0.00020182496465697601


 81%|████████  | 2300/2835 [1:06:47<15:24,  1.73s/it]

Epoch[1/10](2300/2835) || training loss 0.6424 || training accuracy 83.33% || lr 0.00019798729592458426


 81%|████████▏ | 2310/2835 [1:07:04<15:22,  1.76s/it]

Epoch[1/10](2310/2835) || training loss 0.6742 || training accuracy 80.83% || lr 0.00019416258391615865


 82%|████████▏ | 2320/2835 [1:07:22<15:02,  1.75s/it]

Epoch[1/10](2320/2835) || training loss 0.6809 || training accuracy 81.67% || lr 0.00019035177232215686


 82%|████████▏ | 2330/2835 [1:07:39<14:25,  1.71s/it]

Epoch[1/10](2330/2835) || training loss 0.6515 || training accuracy 80.83% || lr 0.0001865558014033173


 83%|████████▎ | 2340/2835 [1:07:56<14:09,  1.72s/it]

Epoch[1/10](2340/2835) || training loss 0.6898 || training accuracy 80.83% || lr 0.000182775607758664


 83%|████████▎ | 2350/2835 [1:08:13<13:56,  1.73s/it]

Epoch[1/10](2350/2835) || training loss 0.7029 || training accuracy 76.67% || lr 0.00017901212409441434


 83%|████████▎ | 2360/2835 [1:08:30<13:33,  1.71s/it]

Epoch[1/10](2360/2835) || training loss 0.646 || training accuracy 82.50% || lr 0.00017526627899384833


 84%|████████▎ | 2370/2835 [1:08:47<13:20,  1.72s/it]

Epoch[1/10](2370/2835) || training loss 0.6786 || training accuracy 84.17% || lr 0.00017153899668819427


 84%|████████▍ | 2380/2835 [1:09:05<13:28,  1.78s/it]

Epoch[1/10](2380/2835) || training loss 0.6871 || training accuracy 81.67% || lr 0.00016783119682858857


 84%|████████▍ | 2390/2835 [1:09:22<12:51,  1.73s/it]

Epoch[1/10](2390/2835) || training loss 0.6651 || training accuracy 85.00% || lr 0.0001641437942591655


 85%|████████▍ | 2400/2835 [1:09:39<12:36,  1.74s/it]

Epoch[1/10](2400/2835) || training loss 0.6471 || training accuracy 83.33% || lr 0.00016047769879133335


 85%|████████▌ | 2410/2835 [1:09:57<12:18,  1.74s/it]

Epoch[1/10](2410/2835) || training loss 0.6322 || training accuracy 84.17% || lr 0.00015683381497929192


 85%|████████▌ | 2420/2835 [1:10:14<11:58,  1.73s/it]

Epoch[1/10](2420/2835) || training loss 0.6303 || training accuracy 85.83% || lr 0.00015321304189684707


 86%|████████▌ | 2430/2835 [1:10:31<11:45,  1.74s/it]

Epoch[1/10](2430/2835) || training loss 0.6565 || training accuracy 84.17% || lr 0.00014961627291557812


 86%|████████▌ | 2440/2835 [1:10:49<11:19,  1.72s/it]

Epoch[1/10](2440/2835) || training loss 0.619 || training accuracy 85.83% || lr 0.00014604439548441164


 86%|████████▋ | 2450/2835 [1:11:06<10:51,  1.69s/it]

Epoch[1/10](2450/2835) || training loss 0.6103 || training accuracy 84.17% || lr 0.00014249829091065672


 87%|████████▋ | 2460/2835 [1:11:23<10:51,  1.74s/it]

Epoch[1/10](2460/2835) || training loss 0.6892 || training accuracy 80.83% || lr 0.0001389788341425553


 87%|████████▋ | 2470/2835 [1:11:41<10:36,  1.74s/it]

Epoch[1/10](2470/2835) || training loss 0.6833 || training accuracy 79.17% || lr 0.00013548689355340218


 87%|████████▋ | 2480/2835 [1:11:58<10:16,  1.74s/it]

Epoch[1/10](2480/2835) || training loss 0.7074 || training accuracy 81.67% || lr 0.0001320233307272867


 88%|████████▊ | 2490/2835 [1:12:15<09:56,  1.73s/it]

Epoch[1/10](2490/2835) || training loss 0.6476 || training accuracy 86.67% || lr 0.0001285890002465097


 88%|████████▊ | 2500/2835 [1:12:33<09:38,  1.73s/it]

Epoch[1/10](2500/2835) || training loss 0.6488 || training accuracy 85.83% || lr 0.00012518474948072854


 89%|████████▊ | 2510/2835 [1:12:50<09:23,  1.73s/it]

Epoch[1/10](2510/2835) || training loss 0.677 || training accuracy 86.67% || lr 0.00012181141837788067


 89%|████████▉ | 2520/2835 [1:13:07<09:08,  1.74s/it]

Epoch[1/10](2520/2835) || training loss 0.6359 || training accuracy 85.83% || lr 0.00011846983925693948


 89%|████████▉ | 2530/2835 [1:13:25<08:51,  1.74s/it]

Epoch[1/10](2530/2835) || training loss 0.6876 || training accuracy 76.67% || lr 0.00011516083660255191


 90%|████████▉ | 2540/2835 [1:13:42<08:23,  1.71s/it]

Epoch[1/10](2540/2835) || training loss 0.6347 || training accuracy 85.00% || lr 0.00011188522686160931


 90%|████████▉ | 2550/2835 [1:13:59<08:25,  1.78s/it]

Epoch[1/10](2550/2835) || training loss 0.6434 || training accuracy 86.67% || lr 0.00010864381824180112


 90%|█████████ | 2560/2835 [1:14:17<07:55,  1.73s/it]

Epoch[1/10](2560/2835) || training loss 0.6601 || training accuracy 81.67% || lr 0.00010543741051220226


 91%|█████████ | 2570/2835 [1:14:34<07:36,  1.72s/it]

Epoch[1/10](2570/2835) || training loss 0.6846 || training accuracy 82.50% || lr 0.00010226679480594132


 91%|█████████ | 2580/2835 [1:14:51<07:20,  1.73s/it]

Epoch[1/10](2580/2835) || training loss 0.6051 || training accuracy 90.00% || lr 9.913275342500111e-05


 91%|█████████▏| 2590/2835 [1:15:08<06:58,  1.71s/it]

Epoch[1/10](2590/2835) || training loss 0.6698 || training accuracy 85.83% || lr 9.603605964719647e-05


 92%|█████████▏| 2600/2835 [1:15:26<06:43,  1.72s/it]

Epoch[1/10](2600/2835) || training loss 0.6879 || training accuracy 86.67% || lr 9.297747753538026e-05


 92%|█████████▏| 2610/2835 [1:15:43<06:26,  1.72s/it]

Epoch[1/10](2610/2835) || training loss 0.6659 || training accuracy 77.50% || lr 8.995776174892215e-05


 92%|█████████▏| 2620/2835 [1:16:00<06:14,  1.74s/it]

Epoch[1/10](2620/2835) || training loss 0.6848 || training accuracy 77.50% || lr 8.697765735750743e-05


 93%|█████████▎| 2630/2835 [1:16:18<05:57,  1.74s/it]

Epoch[1/10](2630/2835) || training loss 0.6271 || training accuracy 88.33% || lr 8.403789965730233e-05


 93%|█████████▎| 2640/2835 [1:16:35<05:35,  1.72s/it]

Epoch[1/10](2640/2835) || training loss 0.6479 || training accuracy 91.67% || lr 8.113921398953107e-05


 93%|█████████▎| 2650/2835 [1:16:52<05:19,  1.73s/it]

Epoch[1/10](2650/2835) || training loss 0.7075 || training accuracy 78.33% || lr 7.828231556150792e-05


 94%|█████████▍| 2660/2835 [1:17:10<05:04,  1.74s/it]

Epoch[1/10](2660/2835) || training loss 0.6584 || training accuracy 81.67% || lr 7.54679092701715e-05


 94%|█████████▍| 2670/2835 [1:17:27<04:47,  1.74s/it]

Epoch[1/10](2670/2835) || training loss 0.6593 || training accuracy 84.17% || lr 7.269668952816126e-05


 95%|█████████▍| 2680/2835 [1:17:44<04:24,  1.71s/it]

Epoch[1/10](2680/2835) || training loss 0.6862 || training accuracy 86.67% || lr 6.996934009248185e-05


 95%|█████████▍| 2690/2835 [1:18:01<04:04,  1.68s/it]

Epoch[1/10](2690/2835) || training loss 0.616 || training accuracy 88.33% || lr 6.728653389579642e-05


 95%|█████████▌| 2700/2835 [1:18:18<03:50,  1.71s/it]

Epoch[1/10](2700/2835) || training loss 0.6571 || training accuracy 81.67% || lr 6.464893288039036e-05


 96%|█████████▌| 2710/2835 [1:18:35<03:34,  1.71s/it]

Epoch[1/10](2710/2835) || training loss 0.6559 || training accuracy 86.67% || lr 6.205718783484671e-05


 96%|█████████▌| 2720/2835 [1:18:53<03:18,  1.73s/it]

Epoch[1/10](2720/2835) || training loss 0.6449 || training accuracy 83.33% || lr 5.9511938233474534e-05


 96%|█████████▋| 2730/2835 [1:19:10<03:03,  1.74s/it]

Epoch[1/10](2730/2835) || training loss 0.6829 || training accuracy 85.00% || lr 5.701381207852752e-05


 97%|█████████▋| 2740/2835 [1:19:28<02:45,  1.74s/it]

Epoch[1/10](2740/2835) || training loss 0.6762 || training accuracy 85.00% || lr 5.456342574525411e-05


 97%|█████████▋| 2750/2835 [1:19:45<02:28,  1.75s/it]

Epoch[1/10](2750/2835) || training loss 0.6628 || training accuracy 87.50% || lr 5.21613838298163e-05


 97%|█████████▋| 2760/2835 [1:20:02<02:08,  1.72s/it]

Epoch[1/10](2760/2835) || training loss 0.6468 || training accuracy 89.17% || lr 4.9808279000113914e-05


 98%|█████████▊| 2770/2835 [1:20:20<01:52,  1.73s/it]

Epoch[1/10](2770/2835) || training loss 0.6878 || training accuracy 85.83% || lr 4.750469184955366e-05


 98%|█████████▊| 2780/2835 [1:20:37<01:37,  1.78s/it]

Epoch[1/10](2780/2835) || training loss 0.6697 || training accuracy 80.83% || lr 4.52511907537955e-05


 98%|█████████▊| 2790/2835 [1:20:54<01:18,  1.74s/it]

Epoch[1/10](2790/2835) || training loss 0.6498 || training accuracy 86.67% || lr 4.304833173051493e-05


 99%|█████████▉| 2800/2835 [1:21:12<01:00,  1.74s/it]

Epoch[1/10](2800/2835) || training loss 0.6848 || training accuracy 88.33% || lr 4.0896658302213986e-05


 99%|█████████▉| 2810/2835 [1:21:29<00:43,  1.74s/it]

Epoch[1/10](2810/2835) || training loss 0.6397 || training accuracy 86.67% || lr 3.879670136211491e-05


 99%|█████████▉| 2820/2835 [1:21:47<00:26,  1.74s/it]

Epoch[1/10](2820/2835) || training loss 0.6417 || training accuracy 86.67% || lr 3.674897904317014e-05


100%|█████████▉| 2830/2835 [1:22:04<00:08,  1.71s/it]

Epoch[1/10](2830/2835) || training loss 0.6515 || training accuracy 85.00% || lr 3.475399659022133e-05


100%|██████████| 2835/2835 [1:22:12<00:00,  1.74s/it]

Calculating validation results...





New best model for val accuracy : 88.28%! saving the best model..


  0%|          | 0/2835 [00:00<?, ?it/s]

[Val] acc : 88.28%, loss: 0.65 || best acc : 88.28%, best loss: 0.65


  0%|          | 10/2835 [00:17<1:20:59,  1.72s/it]

Epoch[2/10](10/2835) || training loss 0.6718 || training accuracy 82.50% || lr 3.186148332905603e-05


  1%|          | 20/2835 [00:34<1:21:15,  1.73s/it]

Epoch[2/10](20/2835) || training loss 0.6307 || training accuracy 86.67% || lr 3.000047529287098e-05


  1%|          | 30/2835 [00:51<1:21:06,  1.74s/it]

Epoch[2/10](30/2835) || training loss 0.6819 || training accuracy 85.00% || lr 2.8193872215002235e-05


  1%|▏         | 40/2835 [01:09<1:21:04,  1.74s/it]

Epoch[2/10](40/2835) || training loss 0.6673 || training accuracy 80.00% || lr 2.6442119847726424e-05


  2%|▏         | 50/2835 [01:26<1:19:58,  1.72s/it]

Epoch[2/10](50/2835) || training loss 0.6773 || training accuracy 82.50% || lr 2.4745650409728172e-05


  2%|▏         | 60/2835 [01:43<1:20:20,  1.74s/it]

Epoch[2/10](60/2835) || training loss 0.6719 || training accuracy 87.50% || lr 2.31048824794564e-05


  2%|▏         | 70/2835 [02:01<1:20:10,  1.74s/it]

Epoch[2/10](70/2835) || training loss 0.6639 || training accuracy 87.50% || lr 2.1520220891846705e-05


  3%|▎         | 80/2835 [02:18<1:19:47,  1.74s/it]

Epoch[2/10](80/2835) || training loss 0.6719 || training accuracy 87.50% || lr 1.9992056638433958e-05


  3%|▎         | 90/2835 [02:36<1:19:01,  1.73s/it]

Epoch[2/10](90/2835) || training loss 0.6219 || training accuracy 91.67% || lr 1.852076677088126e-05


  4%|▎         | 100/2835 [02:53<1:17:57,  1.71s/it]

Epoch[2/10](100/2835) || training loss 0.6552 || training accuracy 86.67% || lr 1.7106714307948117e-05


  4%|▍         | 110/2835 [03:10<1:16:55,  1.69s/it]

Epoch[2/10](110/2835) || training loss 0.639 || training accuracy 86.67% || lr 1.5750248145920777e-05


  4%|▍         | 120/2835 [03:27<1:16:31,  1.69s/it]

Epoch[2/10](120/2835) || training loss 0.6777 || training accuracy 81.67% || lr 1.4451702972527448e-05


  5%|▍         | 130/2835 [03:44<1:16:23,  1.69s/it]

Epoch[2/10](130/2835) || training loss 0.632 || training accuracy 90.83% || lr 1.3211399184359193e-05


  5%|▍         | 140/2835 [04:01<1:16:59,  1.71s/it]

Epoch[2/10](140/2835) || training loss 0.6454 || training accuracy 88.33% || lr 1.2029642807816635e-05


  5%|▌         | 150/2835 [04:18<1:17:56,  1.74s/it]

Epoch[2/10](150/2835) || training loss 0.6871 || training accuracy 80.83% || lr 1.0906725423602705e-05


  6%|▌         | 160/2835 [04:35<1:17:32,  1.74s/it]

Epoch[2/10](160/2835) || training loss 0.6222 || training accuracy 90.00% || lr 9.842924094779408e-06


  6%|▌         | 170/2835 [04:53<1:17:07,  1.74s/it]

Epoch[2/10](170/2835) || training loss 0.6343 || training accuracy 86.67% || lr 8.838501298406547e-06


  6%|▋         | 180/2835 [05:10<1:16:27,  1.73s/it]

Epoch[2/10](180/2835) || training loss 0.6525 || training accuracy 82.50% || lr 7.893704860779699e-06


  7%|▋         | 190/2835 [05:27<1:14:00,  1.68s/it]

Epoch[2/10](190/2835) || training loss 0.6822 || training accuracy 81.67% || lr 7.008767896282542e-06


  7%|▋         | 200/2835 [05:44<1:14:13,  1.69s/it]

Epoch[2/10](200/2835) || training loss 0.6502 || training accuracy 91.67% || lr 6.183908749869417e-06


  7%|▋         | 210/2835 [06:01<1:15:08,  1.72s/it]

Epoch[2/10](210/2835) || training loss 0.664 || training accuracy 85.00% || lr 5.419330943192164e-06


  8%|▊         | 220/2835 [06:18<1:14:40,  1.71s/it]

Epoch[2/10](220/2835) || training loss 0.6678 || training accuracy 85.83% || lr 4.71522312438392e-06


  8%|▊         | 230/2835 [06:35<1:14:19,  1.71s/it]

Epoch[2/10](230/2835) || training loss 0.6569 || training accuracy 87.50% || lr 4.071759021513154e-06


  8%|▊         | 240/2835 [06:52<1:13:43,  1.70s/it]

Epoch[2/10](240/2835) || training loss 0.6118 || training accuracy 92.50% || lr 3.4890973997188944e-06


  9%|▉         | 250/2835 [07:09<1:13:23,  1.70s/it]

Epoch[2/10](250/2835) || training loss 0.652 || training accuracy 91.67% || lr 2.967382022037798e-06


  9%|▉         | 260/2835 [07:27<1:13:43,  1.72s/it]

Epoch[2/10](260/2835) || training loss 0.6474 || training accuracy 87.50% || lr 2.506741613932663e-06


 10%|▉         | 270/2835 [07:44<1:14:30,  1.74s/it]

Epoch[2/10](270/2835) || training loss 0.6618 || training accuracy 84.17% || lr 2.107289831531541e-06


 10%|▉         | 280/2835 [08:01<1:13:48,  1.73s/it]

Epoch[2/10](280/2835) || training loss 0.6496 || training accuracy 90.83% || lr 1.769125233584573e-06


 10%|█         | 290/2835 [08:19<1:13:26,  1.73s/it]

Epoch[2/10](290/2835) || training loss 0.6508 || training accuracy 87.50% || lr 1.492331257146246e-06


 11%|█         | 300/2835 [08:36<1:13:52,  1.75s/it]

Epoch[2/10](300/2835) || training loss 0.6653 || training accuracy 90.83% || lr 1.276976196988509e-06


 11%|█         | 310/2835 [08:54<1:13:21,  1.74s/it]

Epoch[2/10](310/2835) || training loss 0.6887 || training accuracy 79.17% || lr 1.1231131887499658e-06


 11%|█▏        | 320/2835 [09:11<1:12:59,  1.74s/it]

Epoch[2/10](320/2835) || training loss 0.6222 || training accuracy 89.17% || lr 1.0307801958256833e-06


 12%|█▏        | 330/2835 [09:28<1:11:58,  1.72s/it]

Epoch[2/10](330/2835) || training loss 0.6764 || training accuracy 84.17% || lr 1e-06


 12%|█▏        | 340/2835 [09:46<1:11:22,  1.72s/it]

Epoch[2/10](340/2835) || training loss 0.6224 || training accuracy 88.33% || lr 3.4899999999999996e-06


 12%|█▏        | 350/2835 [10:03<1:11:27,  1.73s/it]

Epoch[2/10](350/2835) || training loss 0.6441 || training accuracy 89.17% || lr 5.9799999999999995e-06


 13%|█▎        | 360/2835 [10:20<1:11:36,  1.74s/it]

Epoch[2/10](360/2835) || training loss 0.658 || training accuracy 83.33% || lr 8.47e-06


 13%|█▎        | 370/2835 [10:38<1:11:35,  1.74s/it]

Epoch[2/10](370/2835) || training loss 0.6395 || training accuracy 84.17% || lr 1.096e-05


 13%|█▎        | 380/2835 [10:55<1:13:21,  1.79s/it]

Epoch[2/10](380/2835) || training loss 0.6429 || training accuracy 88.33% || lr 1.345e-05


 14%|█▍        | 390/2835 [11:12<1:10:35,  1.73s/it]

Epoch[2/10](390/2835) || training loss 0.6523 || training accuracy 87.50% || lr 1.594e-05


 14%|█▍        | 400/2835 [11:30<1:10:54,  1.75s/it]

Epoch[2/10](400/2835) || training loss 0.6679 || training accuracy 88.33% || lr 1.8429999999999998e-05


 14%|█▍        | 410/2835 [11:47<1:10:01,  1.73s/it]

Epoch[2/10](410/2835) || training loss 0.6034 || training accuracy 88.33% || lr 2.092e-05


 15%|█▍        | 420/2835 [12:05<1:09:48,  1.73s/it]

Epoch[2/10](420/2835) || training loss 0.6527 || training accuracy 86.67% || lr 2.341e-05


 15%|█▌        | 430/2835 [12:22<1:10:02,  1.75s/it]

Epoch[2/10](430/2835) || training loss 0.6736 || training accuracy 80.83% || lr 2.59e-05


 16%|█▌        | 440/2835 [12:39<1:09:36,  1.74s/it]

Epoch[2/10](440/2835) || training loss 0.6324 || training accuracy 91.67% || lr 2.8389999999999998e-05


 16%|█▌        | 450/2835 [12:57<1:09:08,  1.74s/it]

Epoch[2/10](450/2835) || training loss 0.6683 || training accuracy 83.33% || lr 3.0879999999999996e-05


 16%|█▌        | 460/2835 [13:14<1:08:22,  1.73s/it]

Epoch[2/10](460/2835) || training loss 0.6582 || training accuracy 82.50% || lr 3.3369999999999994e-05


 17%|█▋        | 470/2835 [13:32<1:08:22,  1.73s/it]

Epoch[2/10](470/2835) || training loss 0.6368 || training accuracy 90.00% || lr 3.585999999999999e-05


 17%|█▋        | 480/2835 [13:49<1:10:18,  1.79s/it]

Epoch[2/10](480/2835) || training loss 0.6422 || training accuracy 87.50% || lr 3.834999999999999e-05


 17%|█▋        | 490/2835 [14:06<1:07:45,  1.73s/it]

Epoch[2/10](490/2835) || training loss 0.6559 || training accuracy 83.33% || lr 4.0839999999999995e-05


 18%|█▊        | 500/2835 [14:24<1:07:46,  1.74s/it]

Epoch[2/10](500/2835) || training loss 0.6293 || training accuracy 85.00% || lr 4.3329999999999993e-05


 18%|█▊        | 510/2835 [14:41<1:08:55,  1.78s/it]

Epoch[2/10](510/2835) || training loss 0.6582 || training accuracy 86.67% || lr 4.582e-05


 18%|█▊        | 520/2835 [14:58<1:05:29,  1.70s/it]

Epoch[2/10](520/2835) || training loss 0.7003 || training accuracy 82.50% || lr 4.8309999999999997e-05


 19%|█▊        | 530/2835 [15:16<1:08:44,  1.79s/it]

Epoch[2/10](530/2835) || training loss 0.6434 || training accuracy 85.00% || lr 5.0799999999999995e-05


 19%|█▉        | 540/2835 [15:33<1:04:40,  1.69s/it]

Epoch[2/10](540/2835) || training loss 0.6594 || training accuracy 90.00% || lr 5.328999999999999e-05


 19%|█▉        | 550/2835 [15:50<1:07:21,  1.77s/it]

Epoch[2/10](550/2835) || training loss 0.6232 || training accuracy 88.33% || lr 5.577999999999999e-05


 20%|█▉        | 560/2835 [16:07<1:04:32,  1.70s/it]

Epoch[2/10](560/2835) || training loss 0.6423 || training accuracy 81.67% || lr 5.826999999999999e-05


 20%|██        | 570/2835 [16:24<1:03:32,  1.68s/it]

Epoch[2/10](570/2835) || training loss 0.6442 || training accuracy 85.00% || lr 6.0759999999999994e-05


 20%|██        | 580/2835 [16:41<1:03:56,  1.70s/it]

Epoch[2/10](580/2835) || training loss 0.6479 || training accuracy 86.67% || lr 6.324999999999999e-05


 21%|██        | 590/2835 [16:59<1:04:42,  1.73s/it]

Epoch[2/10](590/2835) || training loss 0.694 || training accuracy 88.33% || lr 6.573999999999999e-05


 21%|██        | 600/2835 [17:16<1:04:56,  1.74s/it]

Epoch[2/10](600/2835) || training loss 0.6212 || training accuracy 94.17% || lr 6.822999999999999e-05


 22%|██▏       | 610/2835 [17:33<1:04:52,  1.75s/it]

Epoch[2/10](610/2835) || training loss 0.6742 || training accuracy 87.50% || lr 7.071999999999999e-05


 22%|██▏       | 620/2835 [17:51<1:04:06,  1.74s/it]

Epoch[2/10](620/2835) || training loss 0.6883 || training accuracy 84.17% || lr 7.321e-05


 22%|██▏       | 630/2835 [18:08<1:03:55,  1.74s/it]

Epoch[2/10](630/2835) || training loss 0.6835 || training accuracy 80.00% || lr 7.569999999999998e-05


 23%|██▎       | 640/2835 [18:26<1:03:49,  1.74s/it]

Epoch[2/10](640/2835) || training loss 0.628 || training accuracy 88.33% || lr 7.819e-05


 23%|██▎       | 650/2835 [18:43<1:04:28,  1.77s/it]

Epoch[2/10](650/2835) || training loss 0.6537 || training accuracy 89.17% || lr 8.068e-05


 23%|██▎       | 660/2835 [19:01<1:02:56,  1.74s/it]

Epoch[2/10](660/2835) || training loss 0.6337 || training accuracy 84.17% || lr 8.316999999999999e-05


 24%|██▎       | 670/2835 [19:18<1:02:19,  1.73s/it]

Epoch[2/10](670/2835) || training loss 0.6513 || training accuracy 86.67% || lr 8.565999999999999e-05


 24%|██▍       | 680/2835 [19:35<1:02:42,  1.75s/it]

Epoch[2/10](680/2835) || training loss 0.6421 || training accuracy 86.67% || lr 8.814999999999999e-05


 24%|██▍       | 690/2835 [19:52<1:01:56,  1.73s/it]

Epoch[2/10](690/2835) || training loss 0.6627 || training accuracy 85.00% || lr 9.064e-05


 25%|██▍       | 700/2835 [20:10<1:01:43,  1.73s/it]

Epoch[2/10](700/2835) || training loss 0.6663 || training accuracy 83.33% || lr 9.312999999999998e-05


 25%|██▌       | 710/2835 [20:27<1:00:46,  1.72s/it]

Epoch[2/10](710/2835) || training loss 0.6537 || training accuracy 85.00% || lr 9.562e-05


 25%|██▌       | 720/2835 [20:44<1:00:25,  1.71s/it]

Epoch[2/10](720/2835) || training loss 0.6418 || training accuracy 86.67% || lr 9.810999999999998e-05


 26%|██▌       | 730/2835 [21:02<1:00:44,  1.73s/it]

Epoch[2/10](730/2835) || training loss 0.6501 || training accuracy 88.33% || lr 0.00010059999999999999


 26%|██▌       | 740/2835 [21:19<1:00:52,  1.74s/it]

Epoch[2/10](740/2835) || training loss 0.6429 || training accuracy 84.17% || lr 0.00010308999999999998


 26%|██▋       | 750/2835 [21:36<1:00:28,  1.74s/it]

Epoch[2/10](750/2835) || training loss 0.6415 || training accuracy 88.33% || lr 0.00010557999999999999


 27%|██▋       | 760/2835 [21:54<59:48,  1.73s/it]  

Epoch[2/10](760/2835) || training loss 0.6345 || training accuracy 91.67% || lr 0.00010807


 27%|██▋       | 770/2835 [22:11<59:49,  1.74s/it]  

Epoch[2/10](770/2835) || training loss 0.6449 || training accuracy 87.50% || lr 0.00011055999999999999


 28%|██▊       | 780/2835 [22:29<59:42,  1.74s/it]

Epoch[2/10](780/2835) || training loss 0.6782 || training accuracy 90.00% || lr 0.00011305


 28%|██▊       | 790/2835 [22:46<59:17,  1.74s/it]

Epoch[2/10](790/2835) || training loss 0.6184 || training accuracy 87.50% || lr 0.00011553999999999998


 28%|██▊       | 800/2835 [23:03<58:59,  1.74s/it]

Epoch[2/10](800/2835) || training loss 0.6252 || training accuracy 87.50% || lr 0.00011802999999999999


 29%|██▊       | 810/2835 [23:21<58:53,  1.74s/it]

Epoch[2/10](810/2835) || training loss 0.6388 || training accuracy 85.83% || lr 0.00012051999999999999


 29%|██▉       | 820/2835 [23:38<58:36,  1.75s/it]

Epoch[2/10](820/2835) || training loss 0.6809 || training accuracy 80.00% || lr 0.00012301


 29%|██▉       | 830/2835 [23:56<57:26,  1.72s/it]

Epoch[2/10](830/2835) || training loss 0.6375 || training accuracy 87.50% || lr 0.0001255


 30%|██▉       | 840/2835 [24:13<57:42,  1.74s/it]

Epoch[2/10](840/2835) || training loss 0.6409 || training accuracy 85.00% || lr 0.00012799


 30%|██▉       | 850/2835 [24:30<57:26,  1.74s/it]

Epoch[2/10](850/2835) || training loss 0.6305 || training accuracy 85.00% || lr 0.00013047999999999998


 30%|███       | 860/2835 [24:48<56:44,  1.72s/it]

Epoch[2/10](860/2835) || training loss 0.6389 || training accuracy 86.67% || lr 0.00013296999999999997


 31%|███       | 870/2835 [25:05<56:58,  1.74s/it]

Epoch[2/10](870/2835) || training loss 0.6199 || training accuracy 86.67% || lr 0.00013545999999999998


 31%|███       | 880/2835 [25:22<56:03,  1.72s/it]

Epoch[2/10](880/2835) || training loss 0.7098 || training accuracy 84.17% || lr 0.00013795


 31%|███▏      | 890/2835 [25:40<56:41,  1.75s/it]

Epoch[2/10](890/2835) || training loss 0.6387 || training accuracy 84.17% || lr 0.00014043999999999998


 32%|███▏      | 900/2835 [25:57<55:49,  1.73s/it]

Epoch[2/10](900/2835) || training loss 0.6666 || training accuracy 86.67% || lr 0.00014293


 32%|███▏      | 910/2835 [26:14<55:42,  1.74s/it]

Epoch[2/10](910/2835) || training loss 0.6623 || training accuracy 90.00% || lr 0.00014542


 32%|███▏      | 920/2835 [26:32<55:33,  1.74s/it]

Epoch[2/10](920/2835) || training loss 0.6399 || training accuracy 85.83% || lr 0.00014790999999999998


 33%|███▎      | 930/2835 [26:49<54:44,  1.72s/it]

Epoch[2/10](930/2835) || training loss 0.6954 || training accuracy 80.00% || lr 0.00015039999999999997


 33%|███▎      | 940/2835 [27:06<54:13,  1.72s/it]

Epoch[2/10](940/2835) || training loss 0.6421 || training accuracy 86.67% || lr 0.00015288999999999998


 34%|███▎      | 950/2835 [27:23<54:19,  1.73s/it]

Epoch[2/10](950/2835) || training loss 0.5907 || training accuracy 87.50% || lr 0.00015538


 34%|███▍      | 960/2835 [27:41<54:13,  1.74s/it]

Epoch[2/10](960/2835) || training loss 0.6943 || training accuracy 82.50% || lr 0.00015786999999999998


 34%|███▍      | 970/2835 [27:58<53:30,  1.72s/it]

Epoch[2/10](970/2835) || training loss 0.5955 || training accuracy 89.17% || lr 0.00016036


 35%|███▍      | 980/2835 [28:15<52:52,  1.71s/it]

Epoch[2/10](980/2835) || training loss 0.6906 || training accuracy 80.83% || lr 0.00016285


 35%|███▍      | 990/2835 [28:32<52:55,  1.72s/it]

Epoch[2/10](990/2835) || training loss 0.6011 || training accuracy 92.50% || lr 0.00016533999999999999


 35%|███▌      | 1000/2835 [28:50<52:37,  1.72s/it]

Epoch[2/10](1000/2835) || training loss 0.672 || training accuracy 83.33% || lr 0.00016782999999999997


 36%|███▌      | 1010/2835 [29:07<52:34,  1.73s/it]

Epoch[2/10](1010/2835) || training loss 0.6241 || training accuracy 86.67% || lr 0.00017031999999999998


 36%|███▌      | 1020/2835 [29:24<52:49,  1.75s/it]

Epoch[2/10](1020/2835) || training loss 0.6883 || training accuracy 87.50% || lr 0.00017281


 36%|███▋      | 1030/2835 [29:42<52:24,  1.74s/it]

Epoch[2/10](1030/2835) || training loss 0.638 || training accuracy 86.67% || lr 0.00017529999999999998


 37%|███▋      | 1040/2835 [29:59<52:03,  1.74s/it]

Epoch[2/10](1040/2835) || training loss 0.646 || training accuracy 85.00% || lr 0.00017778999999999996


 37%|███▋      | 1050/2835 [30:16<51:01,  1.72s/it]

Epoch[2/10](1050/2835) || training loss 0.6125 || training accuracy 87.50% || lr 0.00018028


 37%|███▋      | 1060/2835 [30:34<50:59,  1.72s/it]

Epoch[2/10](1060/2835) || training loss 0.6433 || training accuracy 88.33% || lr 0.00018277


 38%|███▊      | 1070/2835 [30:51<50:30,  1.72s/it]

Epoch[2/10](1070/2835) || training loss 0.6418 || training accuracy 93.33% || lr 0.00018525999999999997


 38%|███▊      | 1080/2835 [31:08<50:25,  1.72s/it]

Epoch[2/10](1080/2835) || training loss 0.6022 || training accuracy 87.50% || lr 0.00018775


 38%|███▊      | 1090/2835 [31:25<50:18,  1.73s/it]

Epoch[2/10](1090/2835) || training loss 0.6575 || training accuracy 87.50% || lr 0.00019024


 39%|███▉      | 1100/2835 [31:43<50:19,  1.74s/it]

Epoch[2/10](1100/2835) || training loss 0.6296 || training accuracy 92.50% || lr 0.00019272999999999998


 39%|███▉      | 1110/2835 [32:00<49:55,  1.74s/it]

Epoch[2/10](1110/2835) || training loss 0.6447 || training accuracy 82.50% || lr 0.00019521999999999996


 40%|███▉      | 1120/2835 [32:17<49:40,  1.74s/it]

Epoch[2/10](1120/2835) || training loss 0.6894 || training accuracy 79.17% || lr 0.00019771


 40%|███▉      | 1130/2835 [32:35<49:32,  1.74s/it]

Epoch[2/10](1130/2835) || training loss 0.6538 || training accuracy 82.50% || lr 0.0002002


 40%|████      | 1140/2835 [32:52<48:13,  1.71s/it]

Epoch[2/10](1140/2835) || training loss 0.6655 || training accuracy 82.50% || lr 0.00020268999999999997


 41%|████      | 1150/2835 [33:09<47:31,  1.69s/it]

Epoch[2/10](1150/2835) || training loss 0.6855 || training accuracy 83.33% || lr 0.00020517999999999996


 41%|████      | 1160/2835 [33:26<47:27,  1.70s/it]

Epoch[2/10](1160/2835) || training loss 0.6418 || training accuracy 87.50% || lr 0.00020767


 41%|████▏     | 1170/2835 [33:43<47:33,  1.71s/it]

Epoch[2/10](1170/2835) || training loss 0.6909 || training accuracy 86.67% || lr 0.00021015999999999998


 42%|████▏     | 1180/2835 [34:01<53:24,  1.94s/it]

Epoch[2/10](1180/2835) || training loss 0.6662 || training accuracy 87.50% || lr 0.00021264999999999996


 42%|████▏     | 1190/2835 [34:22<57:12,  2.09s/it]

Epoch[2/10](1190/2835) || training loss 0.6825 || training accuracy 82.50% || lr 0.00021514


 42%|████▏     | 1200/2835 [34:43<57:22,  2.11s/it]

Epoch[2/10](1200/2835) || training loss 0.6453 || training accuracy 89.17% || lr 0.00021763


 43%|████▎     | 1210/2835 [35:04<56:29,  2.09s/it]

Epoch[2/10](1210/2835) || training loss 0.6448 || training accuracy 88.33% || lr 0.00022011999999999997


 43%|████▎     | 1220/2835 [35:25<56:45,  2.11s/it]

Epoch[2/10](1220/2835) || training loss 0.6531 || training accuracy 90.00% || lr 0.00022260999999999996


 43%|████▎     | 1230/2835 [35:46<56:07,  2.10s/it]

Epoch[2/10](1230/2835) || training loss 0.675 || training accuracy 87.50% || lr 0.0002251


 44%|████▎     | 1240/2835 [36:03<45:32,  1.71s/it]

Epoch[2/10](1240/2835) || training loss 0.6733 || training accuracy 86.67% || lr 0.00022758999999999998


 44%|████▍     | 1250/2835 [36:20<45:14,  1.71s/it]

Epoch[2/10](1250/2835) || training loss 0.656 || training accuracy 86.67% || lr 0.00023007999999999997


 44%|████▍     | 1260/2835 [36:37<45:08,  1.72s/it]

Epoch[2/10](1260/2835) || training loss 0.6376 || training accuracy 86.67% || lr 0.00023256999999999998


 45%|████▍     | 1270/2835 [36:54<44:54,  1.72s/it]

Epoch[2/10](1270/2835) || training loss 0.6619 || training accuracy 90.00% || lr 0.00023506


 45%|████▌     | 1280/2835 [37:12<45:59,  1.77s/it]

Epoch[2/10](1280/2835) || training loss 0.6325 || training accuracy 90.83% || lr 0.00023754999999999997


 46%|████▌     | 1290/2835 [37:29<44:11,  1.72s/it]

Epoch[2/10](1290/2835) || training loss 0.5822 || training accuracy 91.67% || lr 0.00024003999999999999


 46%|████▌     | 1300/2835 [37:46<44:27,  1.74s/it]

Epoch[2/10](1300/2835) || training loss 0.6116 || training accuracy 90.83% || lr 0.00024253


 46%|████▌     | 1310/2835 [38:04<44:26,  1.75s/it]

Epoch[2/10](1310/2835) || training loss 0.6218 || training accuracy 85.83% || lr 0.00024502


 47%|████▋     | 1320/2835 [38:21<42:56,  1.70s/it]

Epoch[2/10](1320/2835) || training loss 0.6738 || training accuracy 80.00% || lr 0.00024751


 47%|████▋     | 1330/2835 [38:39<43:25,  1.73s/it]

Epoch[2/10](1330/2835) || training loss 0.6217 || training accuracy 86.67% || lr 0.00025


 47%|████▋     | 1340/2835 [38:56<43:19,  1.74s/it]

Epoch[2/10](1340/2835) || training loss 0.6416 || training accuracy 84.17% || lr 0.00024998464074396675


 48%|████▊     | 1350/2835 [39:13<42:58,  1.74s/it]

Epoch[2/10](1350/2835) || training loss 0.6508 || training accuracy 85.83% || lr 0.0002499385667655336


 48%|████▊     | 1360/2835 [39:30<41:40,  1.70s/it]

Epoch[2/10](1360/2835) || training loss 0.6433 || training accuracy 88.33% || lr 0.0002498617894327653


 48%|████▊     | 1370/2835 [39:47<41:54,  1.72s/it]

Epoch[2/10](1370/2835) || training loss 0.693 || training accuracy 84.17% || lr 0.00024975432768931983


 49%|████▊     | 1380/2835 [40:04<41:23,  1.71s/it]

Epoch[2/10](1380/2835) || training loss 0.6394 || training accuracy 86.67% || lr 0.00024961620804977443


 49%|████▉     | 1390/2835 [40:22<41:28,  1.72s/it]

Epoch[2/10](1390/2835) || training loss 0.6364 || training accuracy 90.83% || lr 0.0002494474645930835


 49%|████▉     | 1400/2835 [40:39<41:54,  1.75s/it]

Epoch[2/10](1400/2835) || training loss 0.6742 || training accuracy 85.83% || lr 0.0002492481389541699


 50%|████▉     | 1410/2835 [40:57<42:39,  1.80s/it]

Epoch[2/10](1410/2835) || training loss 0.6305 || training accuracy 87.50% || lr 0.0002490182803136525


 50%|█████     | 1420/2835 [41:14<40:31,  1.72s/it]

Epoch[2/10](1420/2835) || training loss 0.6479 || training accuracy 90.83% || lr 0.0002487579453857114


 50%|█████     | 1430/2835 [41:31<40:23,  1.73s/it]

Epoch[2/10](1430/2835) || training loss 0.6544 || training accuracy 84.17% || lr 0.00024846719840409463


 51%|█████     | 1440/2835 [41:49<40:17,  1.73s/it]

Epoch[2/10](1440/2835) || training loss 0.6707 || training accuracy 85.00% || lr 0.00024814611110626935


 51%|█████     | 1450/2835 [42:06<40:19,  1.75s/it]

Epoch[2/10](1450/2835) || training loss 0.6857 || training accuracy 82.50% || lr 0.00024779476271572176


 51%|█████▏    | 1460/2835 [42:23<39:52,  1.74s/it]

Epoch[2/10](1460/2835) || training loss 0.6717 || training accuracy 80.00% || lr 0.00024741323992240983


 52%|█████▏    | 1470/2835 [42:41<39:45,  1.75s/it]

Epoch[2/10](1470/2835) || training loss 0.684 || training accuracy 80.83% || lr 0.0002470016368613741


 52%|█████▏    | 1480/2835 [42:58<39:23,  1.74s/it]

Epoch[2/10](1480/2835) || training loss 0.6418 || training accuracy 88.33% || lr 0.00024656005508951075


 53%|█████▎    | 1490/2835 [43:16<38:46,  1.73s/it]

Epoch[2/10](1490/2835) || training loss 0.6795 || training accuracy 80.83% || lr 0.00024608860356051455


 53%|█████▎    | 1500/2835 [43:33<38:48,  1.74s/it]

Epoch[2/10](1500/2835) || training loss 0.6851 || training accuracy 80.83% || lr 0.0002455873985979959


 53%|█████▎    | 1510/2835 [43:51<38:41,  1.75s/it]

Epoch[2/10](1510/2835) || training loss 0.6225 || training accuracy 86.67% || lr 0.0002450565638667794


 54%|█████▎    | 1520/2835 [44:08<38:11,  1.74s/it]

Epoch[2/10](1520/2835) || training loss 0.6846 || training accuracy 88.33% || lr 0.0002444962303423921


 54%|█████▍    | 1530/2835 [44:25<37:47,  1.74s/it]

Epoch[2/10](1530/2835) || training loss 0.6441 || training accuracy 91.67% || lr 0.0002439065362787466


 54%|█████▍    | 1540/2835 [44:43<37:37,  1.74s/it]

Epoch[2/10](1540/2835) || training loss 0.6152 || training accuracy 85.00% || lr 0.00024328762717402936


 55%|█████▍    | 1550/2835 [45:00<37:20,  1.74s/it]

Epoch[2/10](1550/2835) || training loss 0.6562 || training accuracy 85.83% || lr 0.00024263965573480107


 55%|█████▌    | 1560/2835 [45:18<37:10,  1.75s/it]

Epoch[2/10](1560/2835) || training loss 0.6699 || training accuracy 83.33% || lr 0.00024196278183831848


 55%|█████▌    | 1570/2835 [45:35<36:41,  1.74s/it]

Epoch[2/10](1570/2835) || training loss 0.6241 || training accuracy 85.00% || lr 0.00024125717249308726


 56%|█████▌    | 1580/2835 [45:53<36:26,  1.74s/it]

Epoch[2/10](1580/2835) || training loss 0.6675 || training accuracy 81.67% || lr 0.00024052300179765518


 56%|█████▌    | 1590/2835 [46:10<36:13,  1.75s/it]

Epoch[2/10](1590/2835) || training loss 0.6645 || training accuracy 82.50% || lr 0.0002397604508976556


 56%|█████▋    | 1600/2835 [46:27<35:39,  1.73s/it]

Epoch[2/10](1600/2835) || training loss 0.6266 || training accuracy 83.33% || lr 0.0002389697079411129


 57%|█████▋    | 1610/2835 [46:45<35:35,  1.74s/it]

Epoch[2/10](1610/2835) || training loss 0.6396 || training accuracy 81.67% || lr 0.00023815096803201941


 57%|█████▋    | 1620/2835 [47:02<35:12,  1.74s/it]

Epoch[2/10](1620/2835) || training loss 0.6499 || training accuracy 89.17% || lr 0.0002373044331821966


 57%|█████▋    | 1630/2835 [47:19<34:40,  1.73s/it]

Epoch[2/10](1630/2835) || training loss 0.616 || training accuracy 93.33% || lr 0.00023643031226145176


 58%|█████▊    | 1640/2835 [47:37<34:29,  1.73s/it]

Epoch[2/10](1640/2835) || training loss 0.6285 || training accuracy 91.67% || lr 0.00023552882094604233


 58%|█████▊    | 1650/2835 [47:54<34:23,  1.74s/it]

Epoch[2/10](1650/2835) || training loss 0.6594 || training accuracy 89.17% || lr 0.00023460018166546098


 59%|█████▊    | 1660/2835 [48:11<33:17,  1.70s/it]

Epoch[2/10](1660/2835) || training loss 0.6635 || training accuracy 83.33% || lr 0.0002336446235475548


 59%|█████▉    | 1670/2835 [48:28<32:43,  1.69s/it]

Epoch[2/10](1670/2835) || training loss 0.6361 || training accuracy 82.50% || lr 0.00023266238236199096


 59%|█████▉    | 1680/2835 [48:45<32:45,  1.70s/it]

Epoch[2/10](1680/2835) || training loss 0.6382 || training accuracy 85.83% || lr 0.00023165370046208445


 60%|█████▉    | 1690/2835 [49:02<32:48,  1.72s/it]

Epoch[2/10](1690/2835) || training loss 0.6497 || training accuracy 90.00% || lr 0.00023061882672500086


 60%|█████▉    | 1700/2835 [49:20<32:51,  1.74s/it]

Epoch[2/10](1700/2835) || training loss 0.6525 || training accuracy 83.33% || lr 0.00022955801649034965


 60%|██████    | 1710/2835 [49:37<32:46,  1.75s/it]

Epoch[2/10](1710/2835) || training loss 0.7152 || training accuracy 82.50% || lr 0.0002284715314971829


 61%|██████    | 1720/2835 [49:55<32:28,  1.75s/it]

Epoch[2/10](1720/2835) || training loss 0.6098 || training accuracy 86.67% || lr 0.0002273596398194154


 61%|██████    | 1730/2835 [50:12<32:03,  1.74s/it]

Epoch[2/10](1730/2835) || training loss 0.6311 || training accuracy 88.33% || lr 0.00022622261579968093


 61%|██████▏   | 1740/2835 [50:29<31:13,  1.71s/it]

Epoch[2/10](1740/2835) || training loss 0.6647 || training accuracy 84.17% || lr 0.00022506073998164276


 62%|██████▏   | 1750/2835 [50:46<31:11,  1.73s/it]

Epoch[2/10](1750/2835) || training loss 0.6523 || training accuracy 90.00% || lr 0.00022387429904077342


 62%|██████▏   | 1760/2835 [51:04<30:47,  1.72s/it]

Epoch[2/10](1760/2835) || training loss 0.6557 || training accuracy 85.83% || lr 0.00022266358571362203


 62%|██████▏   | 1770/2835 [51:21<30:20,  1.71s/it]

Epoch[2/10](1770/2835) || training loss 0.6484 || training accuracy 88.33% || lr 0.00022142889872558573


 63%|██████▎   | 1780/2835 [51:38<30:09,  1.72s/it]

Epoch[2/10](1780/2835) || training loss 0.6513 || training accuracy 82.50% || lr 0.00022017054271720383


 63%|██████▎   | 1790/2835 [51:55<29:52,  1.72s/it]

Epoch[2/10](1790/2835) || training loss 0.6233 || training accuracy 87.50% || lr 0.0002188888281689922


 63%|██████▎   | 1800/2835 [52:12<29:46,  1.73s/it]

Epoch[2/10](1800/2835) || training loss 0.6453 || training accuracy 86.67% || lr 0.00021758407132483687


 64%|██████▍   | 1810/2835 [52:29<28:49,  1.69s/it]

Epoch[2/10](1810/2835) || training loss 0.5988 || training accuracy 91.67% || lr 0.00021625659411396572


 64%|██████▍   | 1820/2835 [52:47<29:01,  1.72s/it]

Epoch[2/10](1820/2835) || training loss 0.627 || training accuracy 85.00% || lr 0.000214906724071517


 65%|██████▍   | 1830/2835 [53:04<29:01,  1.73s/it]

Epoch[2/10](1830/2835) || training loss 0.6992 || training accuracy 85.83% || lr 0.00021353479425772513


 65%|██████▍   | 1840/2835 [53:21<28:42,  1.73s/it]

Epoch[2/10](1840/2835) || training loss 0.6855 || training accuracy 80.83% || lr 0.0002121411431757431


 65%|██████▌   | 1850/2835 [53:39<28:42,  1.75s/it]

Epoch[2/10](1850/2835) || training loss 0.6221 || training accuracy 85.00% || lr 0.00021072611468812174


 66%|██████▌   | 1860/2835 [53:56<28:19,  1.74s/it]

Epoch[2/10](1860/2835) || training loss 0.6282 || training accuracy 88.33% || lr 0.00020929005793196676


 66%|██████▌   | 1870/2835 [54:14<27:48,  1.73s/it]

Epoch[2/10](1870/2835) || training loss 0.639 || training accuracy 86.67% || lr 0.00020783332723279464


 66%|██████▋   | 1880/2835 [54:31<27:34,  1.73s/it]

Epoch[2/10](1880/2835) || training loss 0.6153 || training accuracy 90.83% || lr 0.00020635628201710784


 67%|██████▋   | 1890/2835 [54:48<27:15,  1.73s/it]

Epoch[2/10](1890/2835) || training loss 0.6566 || training accuracy 88.33% || lr 0.00020485928672371186


 67%|██████▋   | 1900/2835 [55:05<27:01,  1.73s/it]

Epoch[2/10](1900/2835) || training loss 0.6756 || training accuracy 83.33% || lr 0.0002033427107137953


 67%|██████▋   | 1910/2835 [55:23<26:43,  1.73s/it]

Epoch[2/10](1910/2835) || training loss 0.6294 || training accuracy 87.50% || lr 0.00020180692817979557


 68%|██████▊   | 1920/2835 [55:40<26:26,  1.73s/it]

Epoch[2/10](1920/2835) || training loss 0.6511 || training accuracy 86.67% || lr 0.00020025231805307253


 68%|██████▊   | 1930/2835 [55:57<26:05,  1.73s/it]

Epoch[2/10](1930/2835) || training loss 0.6354 || training accuracy 87.50% || lr 0.0001986792639104129


 68%|██████▊   | 1940/2835 [56:15<25:49,  1.73s/it]

Epoch[2/10](1940/2835) || training loss 0.6428 || training accuracy 86.67% || lr 0.00019708815387938818


 69%|██████▉   | 1950/2835 [56:32<25:45,  1.75s/it]

Epoch[2/10](1950/2835) || training loss 0.6745 || training accuracy 87.50% || lr 0.00019547938054259023


 69%|██████▉   | 1960/2835 [56:50<25:17,  1.73s/it]

Epoch[2/10](1960/2835) || training loss 0.6786 || training accuracy 80.83% || lr 0.00019385334084076738


 69%|██████▉   | 1970/2835 [57:07<25:08,  1.74s/it]

Epoch[2/10](1970/2835) || training loss 0.6343 || training accuracy 86.67% || lr 0.00019221043597488505


 70%|██████▉   | 1980/2835 [57:25<24:46,  1.74s/it]

Epoch[2/10](1980/2835) || training loss 0.6123 || training accuracy 86.67% || lr 0.0001905510713071356


 70%|███████   | 1990/2835 [57:42<24:25,  1.73s/it]

Epoch[2/10](1990/2835) || training loss 0.6761 || training accuracy 85.83% || lr 0.0001888756562609212


 71%|███████   | 2000/2835 [57:59<24:02,  1.73s/it]

Epoch[2/10](2000/2835) || training loss 0.6294 || training accuracy 90.83% || lr 0.00018718460421983472


 71%|███████   | 2010/2835 [58:17<23:56,  1.74s/it]

Epoch[2/10](2010/2835) || training loss 0.7149 || training accuracy 80.00% || lr 0.00018547833242566354


 71%|███████▏  | 2020/2835 [58:34<23:40,  1.74s/it]

Epoch[2/10](2020/2835) || training loss 0.638 || training accuracy 82.50% || lr 0.00018375726187544139


 72%|███████▏  | 2030/2835 [58:51<23:23,  1.74s/it]

Epoch[2/10](2030/2835) || training loss 0.6761 || training accuracy 85.00% || lr 0.00018202181721757354


 72%|███████▏  | 2040/2835 [59:09<23:07,  1.74s/it]

Epoch[2/10](2040/2835) || training loss 0.6538 || training accuracy 81.67% || lr 0.00018027242664706141


 72%|███████▏  | 2050/2835 [59:26<22:48,  1.74s/it]

Epoch[2/10](2050/2835) || training loss 0.6768 || training accuracy 84.17% || lr 0.00017850952179985152


 73%|███████▎  | 2060/2835 [59:44<22:17,  1.73s/it]

Epoch[2/10](2060/2835) || training loss 0.6757 || training accuracy 79.17% || lr 0.00017673353764633602


 73%|███████▎  | 2070/2835 [1:00:01<22:10,  1.74s/it]

Epoch[2/10](2070/2835) || training loss 0.6592 || training accuracy 87.50% || lr 0.00017494491238403018


 73%|███████▎  | 2080/2835 [1:00:18<21:57,  1.74s/it]

Epoch[2/10](2080/2835) || training loss 0.6252 || training accuracy 91.67% || lr 0.00017314408732945367


 74%|███████▎  | 2090/2835 [1:00:36<21:37,  1.74s/it]

Epoch[2/10](2090/2835) || training loss 0.6252 || training accuracy 84.17% || lr 0.00017133150680924238


 74%|███████▍  | 2100/2835 [1:00:53<21:57,  1.79s/it]

Epoch[2/10](2100/2835) || training loss 0.6489 || training accuracy 87.50% || lr 0.0001695076180505175


 74%|███████▍  | 2110/2835 [1:01:11<20:59,  1.74s/it]

Epoch[2/10](2110/2835) || training loss 0.5764 || training accuracy 88.33% || lr 0.00016767287107053878


 75%|███████▍  | 2120/2835 [1:01:28<20:47,  1.74s/it]

Epoch[2/10](2120/2835) || training loss 0.6352 || training accuracy 91.67% || lr 0.0001658277185656696


 75%|███████▌  | 2130/2835 [1:01:45<20:28,  1.74s/it]

Epoch[2/10](2130/2835) || training loss 0.6477 || training accuracy 83.33% || lr 0.00016397261579968094


 75%|███████▌  | 2140/2835 [1:02:03<20:15,  1.75s/it]

Epoch[2/10](2140/2835) || training loss 0.6585 || training accuracy 85.00% || lr 0.00016210802049142185


 76%|███████▌  | 2150/2835 [1:02:20<19:52,  1.74s/it]

Epoch[2/10](2150/2835) || training loss 0.6629 || training accuracy 83.33% || lr 0.00016023439270188404


 76%|███████▌  | 2160/2835 [1:02:38<19:30,  1.73s/it]

Epoch[2/10](2160/2835) || training loss 0.6691 || training accuracy 82.50% || lr 0.00015835219472068893


 77%|███████▋  | 2170/2835 [1:02:55<18:35,  1.68s/it]

Epoch[2/10](2170/2835) || training loss 0.6743 || training accuracy 87.50% || lr 0.0001564618909520244


 77%|███████▋  | 2180/2835 [1:03:12<18:38,  1.71s/it]

Epoch[2/10](2180/2835) || training loss 0.5976 || training accuracy 86.67% || lr 0.00015456394780006023


 77%|███████▋  | 2190/2835 [1:03:29<18:23,  1.71s/it]

Epoch[2/10](2190/2835) || training loss 0.6791 || training accuracy 87.50% || lr 0.00015265883355386955


 78%|███████▊  | 2200/2835 [1:03:46<18:05,  1.71s/it]

Epoch[2/10](2200/2835) || training loss 0.6495 || training accuracy 86.67% || lr 0.00015074701827188578


 78%|███████▊  | 2210/2835 [1:04:03<17:55,  1.72s/it]

Epoch[2/10](2210/2835) || training loss 0.661 || training accuracy 86.67% || lr 0.0001488289736659227


 78%|███████▊  | 2220/2835 [1:04:21<18:05,  1.77s/it]

Epoch[2/10](2220/2835) || training loss 0.644 || training accuracy 93.33% || lr 0.00014690517298478648


 79%|███████▊  | 2230/2835 [1:04:38<17:36,  1.75s/it]

Epoch[2/10](2230/2835) || training loss 0.644 || training accuracy 83.33% || lr 0.00014497609089750873


 79%|███████▉  | 2240/2835 [1:04:55<17:19,  1.75s/it]

Epoch[2/10](2240/2835) || training loss 0.621 || training accuracy 89.17% || lr 0.00014304220337622902


 79%|███████▉  | 2250/2835 [1:05:13<17:00,  1.74s/it]

Epoch[2/10](2250/2835) || training loss 0.6816 || training accuracy 85.83% || lr 0.00014110398757875587


 80%|███████▉  | 2260/2835 [1:05:30<16:40,  1.74s/it]

Epoch[2/10](2260/2835) || training loss 0.6539 || training accuracy 84.17% || lr 0.00013916192173083514


 80%|████████  | 2270/2835 [1:05:48<16:18,  1.73s/it]

Epoch[2/10](2270/2835) || training loss 0.6479 || training accuracy 88.33% || lr 0.000137216485008155


 80%|████████  | 2280/2835 [1:06:05<16:02,  1.73s/it]

Epoch[2/10](2280/2835) || training loss 0.6387 || training accuracy 90.83% || lr 0.0001352681574181167


 81%|████████  | 2290/2835 [1:06:22<15:35,  1.72s/it]

Epoch[2/10](2290/2835) || training loss 0.6409 || training accuracy 89.17% || lr 0.0001333174196813995


 81%|████████  | 2300/2835 [1:06:39<15:27,  1.73s/it]

Epoch[2/10](2300/2835) || training loss 0.6082 || training accuracy 91.67% || lr 0.0001313647531133505


 81%|████████▏ | 2310/2835 [1:06:57<15:12,  1.74s/it]

Epoch[2/10](2310/2835) || training loss 0.6617 || training accuracy 85.83% || lr 0.00012941063950522695


 82%|████████▏ | 2320/2835 [1:07:14<14:54,  1.74s/it]

Epoch[2/10](2320/2835) || training loss 0.6415 || training accuracy 86.67% || lr 0.00012745556100532166


 82%|████████▏ | 2330/2835 [1:07:32<14:40,  1.74s/it]

Epoch[2/10](2330/2835) || training loss 0.5964 || training accuracy 92.50% || lr 0.0001255


 83%|████████▎ | 2340/2835 [1:07:49<14:18,  1.74s/it]

Epoch[2/10](2340/2835) || training loss 0.6291 || training accuracy 88.33% || lr 0.00012354443899467835


 83%|████████▎ | 2350/2835 [1:08:06<13:57,  1.73s/it]

Epoch[2/10](2350/2835) || training loss 0.6566 || training accuracy 85.00% || lr 0.00012158936049477302


 83%|████████▎ | 2360/2835 [1:08:24<13:42,  1.73s/it]

Epoch[2/10](2360/2835) || training loss 0.6432 || training accuracy 87.50% || lr 0.00011963524688664946


 84%|████████▎ | 2370/2835 [1:08:41<13:29,  1.74s/it]

Epoch[2/10](2370/2835) || training loss 0.6318 || training accuracy 88.33% || lr 0.00011768258031860051


 84%|████████▍ | 2380/2835 [1:08:58<13:08,  1.73s/it]

Epoch[2/10](2380/2835) || training loss 0.6557 || training accuracy 90.00% || lr 0.00011573184258188331


 84%|████████▍ | 2390/2835 [1:09:16<12:56,  1.75s/it]

Epoch[2/10](2390/2835) || training loss 0.6502 || training accuracy 90.83% || lr 0.00011378351499184495


 85%|████████▍ | 2400/2835 [1:09:33<12:38,  1.74s/it]

Epoch[2/10](2400/2835) || training loss 0.6262 || training accuracy 91.67% || lr 0.00011183807826916485


 85%|████████▌ | 2410/2835 [1:09:51<12:14,  1.73s/it]

Epoch[2/10](2410/2835) || training loss 0.5693 || training accuracy 95.00% || lr 0.00010989601242124412


 85%|████████▌ | 2420/2835 [1:10:08<12:04,  1.75s/it]

Epoch[2/10](2420/2835) || training loss 0.6915 || training accuracy 84.17% || lr 0.00010795779662377095


 86%|████████▌ | 2430/2835 [1:10:25<11:45,  1.74s/it]

Epoch[2/10](2430/2835) || training loss 0.6212 || training accuracy 85.00% || lr 0.00010602390910249126


 86%|████████▌ | 2440/2835 [1:10:43<11:12,  1.70s/it]

Epoch[2/10](2440/2835) || training loss 0.6411 || training accuracy 83.33% || lr 0.00010409482701521348


 86%|████████▋ | 2450/2835 [1:11:00<11:12,  1.75s/it]

Epoch[2/10](2450/2835) || training loss 0.6146 || training accuracy 94.17% || lr 0.00010217102633407727


 87%|████████▋ | 2460/2835 [1:11:18<10:53,  1.74s/it]

Epoch[2/10](2460/2835) || training loss 0.6325 || training accuracy 90.83% || lr 0.0001002529817281142


 87%|████████▋ | 2470/2835 [1:11:35<10:29,  1.73s/it]

Epoch[2/10](2470/2835) || training loss 0.6411 || training accuracy 89.17% || lr 9.834116644613045e-05


 87%|████████▋ | 2480/2835 [1:11:53<10:33,  1.78s/it]

Epoch[2/10](2480/2835) || training loss 0.627 || training accuracy 86.67% || lr 9.643605219993977e-05


 88%|████████▊ | 2490/2835 [1:12:10<09:53,  1.72s/it]

Epoch[2/10](2490/2835) || training loss 0.6118 || training accuracy 90.83% || lr 9.453810904797559e-05


 88%|████████▊ | 2500/2835 [1:12:27<09:22,  1.68s/it]

Epoch[2/10](2500/2835) || training loss 0.6779 || training accuracy 87.50% || lr 9.264780527931107e-05


 89%|████████▊ | 2510/2835 [1:12:44<09:23,  1.73s/it]

Epoch[2/10](2510/2835) || training loss 0.6416 || training accuracy 88.33% || lr 9.076560729811593e-05


 89%|████████▉ | 2520/2835 [1:13:01<09:08,  1.74s/it]

Epoch[2/10](2520/2835) || training loss 0.6557 || training accuracy 87.50% || lr 8.889197950857817e-05


 89%|████████▉ | 2530/2835 [1:13:19<08:51,  1.74s/it]

Epoch[2/10](2530/2835) || training loss 0.6742 || training accuracy 89.17% || lr 8.702738420031905e-05


 90%|████████▉ | 2540/2835 [1:13:36<08:26,  1.72s/it]

Epoch[2/10](2540/2835) || training loss 0.6389 || training accuracy 85.83% || lr 8.517228143433038e-05


 90%|████████▉ | 2550/2835 [1:13:53<08:06,  1.71s/it]

Epoch[2/10](2550/2835) || training loss 0.6194 || training accuracy 91.67% || lr 8.332712892946122e-05


 90%|█████████ | 2560/2835 [1:14:10<07:49,  1.71s/it]

Epoch[2/10](2560/2835) || training loss 0.614 || training accuracy 89.17% || lr 8.14923819494825e-05


 91%|█████████ | 2570/2835 [1:14:27<07:37,  1.73s/it]

Epoch[2/10](2570/2835) || training loss 0.6368 || training accuracy 86.67% || lr 7.966849319075759e-05


 91%|█████████ | 2580/2835 [1:14:45<07:22,  1.74s/it]

Epoch[2/10](2580/2835) || training loss 0.6413 || training accuracy 90.83% || lr 7.785591267054632e-05


 91%|█████████▏| 2590/2835 [1:15:02<07:08,  1.75s/it]

Epoch[2/10](2590/2835) || training loss 0.6691 || training accuracy 92.50% || lr 7.60550876159698e-05


 92%|█████████▏| 2600/2835 [1:15:19<06:47,  1.74s/it]

Epoch[2/10](2600/2835) || training loss 0.6501 || training accuracy 88.33% || lr 7.426646235366395e-05


 92%|█████████▏| 2610/2835 [1:15:37<06:32,  1.75s/it]

Epoch[2/10](2610/2835) || training loss 0.619 || training accuracy 93.33% || lr 7.249047820014844e-05


 92%|█████████▏| 2620/2835 [1:15:54<06:05,  1.70s/it]

Epoch[2/10](2620/2835) || training loss 0.6264 || training accuracy 85.83% || lr 7.072757335293855e-05


 93%|█████████▎| 2630/2835 [1:16:11<05:48,  1.70s/it]

Epoch[2/10](2630/2835) || training loss 0.7025 || training accuracy 84.17% || lr 6.897818278242642e-05


 93%|█████████▎| 2640/2835 [1:16:28<05:36,  1.73s/it]

Epoch[2/10](2640/2835) || training loss 0.6058 || training accuracy 92.50% || lr 6.724273812455863e-05


 93%|█████████▎| 2650/2835 [1:16:45<05:17,  1.72s/it]

Epoch[2/10](2650/2835) || training loss 0.6598 || training accuracy 84.17% || lr 6.552166757433648e-05


 94%|█████████▍| 2660/2835 [1:17:03<05:01,  1.72s/it]

Epoch[2/10](2660/2835) || training loss 0.6374 || training accuracy 86.67% || lr 6.381539578016524e-05


 94%|█████████▍| 2670/2835 [1:17:20<04:47,  1.74s/it]

Epoch[2/10](2670/2835) || training loss 0.6357 || training accuracy 83.33% || lr 6.212434373907876e-05


 95%|█████████▍| 2680/2835 [1:17:37<04:28,  1.73s/it]

Epoch[2/10](2680/2835) || training loss 0.6193 || training accuracy 90.83% || lr 6.0448928692864365e-05


 95%|█████████▍| 2690/2835 [1:17:55<04:13,  1.75s/it]

Epoch[2/10](2690/2835) || training loss 0.6723 || training accuracy 88.33% || lr 5.878956402511494e-05


 95%|█████████▌| 2700/2835 [1:18:12<03:54,  1.74s/it]

Epoch[2/10](2700/2835) || training loss 0.6359 || training accuracy 89.17% || lr 5.714665915923261e-05


 96%|█████████▌| 2710/2835 [1:18:29<03:38,  1.75s/it]

Epoch[2/10](2710/2835) || training loss 0.595 || training accuracy 95.83% || lr 5.552061945740972e-05


 96%|█████████▌| 2720/2835 [1:18:47<03:17,  1.72s/it]

Epoch[2/10](2720/2835) || training loss 0.6281 || training accuracy 85.83% || lr 5.3911846120611813e-05


 96%|█████████▋| 2730/2835 [1:19:04<03:00,  1.72s/it]

Epoch[2/10](2730/2835) || training loss 0.6041 || training accuracy 90.00% || lr 5.2320736089587104e-05


 97%|█████████▋| 2740/2835 [1:19:21<02:45,  1.75s/it]

Epoch[2/10](2740/2835) || training loss 0.6101 || training accuracy 90.00% || lr 5.074768194692745e-05


 97%|█████████▋| 2750/2835 [1:19:39<02:28,  1.75s/it]

Epoch[2/10](2750/2835) || training loss 0.6679 || training accuracy 85.00% || lr 4.919307182020445e-05


 97%|█████████▋| 2760/2835 [1:19:56<02:08,  1.72s/it]

Epoch[2/10](2760/2835) || training loss 0.6283 || training accuracy 90.00% || lr 4.76572892862047e-05


 98%|█████████▊| 2770/2835 [1:20:13<01:51,  1.71s/it]

Epoch[2/10](2770/2835) || training loss 0.6378 || training accuracy 85.83% || lr 4.614071327628812e-05


 98%|█████████▊| 2780/2835 [1:20:30<01:35,  1.74s/it]

Epoch[2/10](2780/2835) || training loss 0.6401 || training accuracy 86.67% || lr 4.464371798289214e-05


 98%|█████████▊| 2790/2835 [1:20:48<01:18,  1.74s/it]

Epoch[2/10](2790/2835) || training loss 0.6272 || training accuracy 93.33% || lr 4.3166672767205314e-05


 99%|█████████▉| 2800/2835 [1:21:05<01:02,  1.78s/it]

Epoch[2/10](2800/2835) || training loss 0.6594 || training accuracy 86.67% || lr 4.1709942068033246e-05


 99%|█████████▉| 2810/2835 [1:21:23<00:43,  1.74s/it]

Epoch[2/10](2810/2835) || training loss 0.6375 || training accuracy 90.83% || lr 4.027388531187829e-05


 99%|█████████▉| 2820/2835 [1:21:40<00:25,  1.73s/it]

Epoch[2/10](2820/2835) || training loss 0.5945 || training accuracy 90.83% || lr 3.885885682425686e-05


100%|█████████▉| 2830/2835 [1:21:57<00:08,  1.73s/it]

Epoch[2/10](2830/2835) || training loss 0.6221 || training accuracy 90.83% || lr 3.7465205742274835e-05


100%|██████████| 2835/2835 [1:22:06<00:00,  1.74s/it]

Calculating validation results...





New best model for val accuracy : 91.51%! saving the best model..


  0%|          | 0/2835 [00:00<?, ?it/s]

[Val] acc : 91.51%, loss: 0.64 || best acc : 91.51%, best loss: 0.64


  0%|          | 10/2835 [00:17<1:22:44,  1.76s/it]

Epoch[3/10](10/2835) || training loss 0.6423 || training accuracy 89.17% || lr 3.541556249103804e-05


  1%|          | 20/2835 [00:35<1:22:01,  1.75s/it]

Epoch[3/10](20/2835) || training loss 0.616 || training accuracy 92.50% || lr 3.407684757525713e-05


  1%|          | 30/2835 [00:52<1:20:52,  1.73s/it]

Epoch[3/10](30/2835) || training loss 0.6333 || training accuracy 91.67% || lr 3.276068995434309e-05


  1%|▏         | 40/2835 [01:09<1:20:42,  1.73s/it]

Epoch[3/10](40/2835) || training loss 0.6552 || training accuracy 88.33% || lr 3.1467414370494645e-05


  2%|▏         | 50/2835 [01:27<1:20:56,  1.74s/it]

Epoch[3/10](50/2835) || training loss 0.6131 || training accuracy 86.67% || lr 3.0197339920110542e-05


  2%|▏         | 60/2835 [01:44<1:20:17,  1.74s/it]

Epoch[3/10](60/2835) || training loss 0.6365 || training accuracy 86.67% || lr 2.895077997505682e-05


  2%|▏         | 70/2835 [02:02<1:20:19,  1.74s/it]

Epoch[3/10](70/2835) || training loss 0.6735 || training accuracy 89.17% || lr 2.772804210534724e-05


  3%|▎         | 80/2835 [02:19<1:19:51,  1.74s/it]

Epoch[3/10](80/2835) || training loss 0.6095 || training accuracy 96.67% || lr 2.652942800325503e-05


  3%|▎         | 90/2835 [02:36<1:19:42,  1.74s/it]

Epoch[3/10](90/2835) || training loss 0.6469 || training accuracy 89.17% || lr 2.535523340887448e-05


  4%|▎         | 100/2835 [02:54<1:19:28,  1.74s/it]

Epoch[3/10](100/2835) || training loss 0.6808 || training accuracy 88.33% || lr 2.4205748037152026e-05


  4%|▍         | 110/2835 [03:11<1:19:18,  1.75s/it]

Epoch[3/10](110/2835) || training loss 0.6419 || training accuracy 90.00% || lr 2.3081255506402966e-05


  4%|▍         | 120/2835 [03:29<1:18:22,  1.73s/it]

Epoch[3/10](120/2835) || training loss 0.6759 || training accuracy 92.50% || lr 2.1982033268333103e-05


  5%|▍         | 130/2835 [03:46<1:18:00,  1.73s/it]

Epoch[3/10](130/2835) || training loss 0.6522 || training accuracy 90.83% || lr 2.0908352539581732e-05


  5%|▍         | 140/2835 [04:03<1:17:09,  1.72s/it]

Epoch[3/10](140/2835) || training loss 0.59 || training accuracy 93.33% || lr 1.986047823480283e-05


  5%|▌         | 150/2835 [04:20<1:16:34,  1.71s/it]

Epoch[3/10](150/2835) || training loss 0.6451 || training accuracy 92.50% || lr 1.883866890130133e-05


  6%|▌         | 160/2835 [04:38<1:17:00,  1.73s/it]

Epoch[3/10](160/2835) || training loss 0.6557 || training accuracy 89.17% || lr 1.7843176655240703e-05


  6%|▌         | 170/2835 [04:55<1:17:42,  1.75s/it]

Epoch[3/10](170/2835) || training loss 0.6107 || training accuracy 94.17% || lr 1.6874247119436752e-05


  6%|▋         | 180/2835 [05:13<1:17:11,  1.74s/it]

Epoch[3/10](180/2835) || training loss 0.6182 || training accuracy 91.67% || lr 1.5932119362754072e-05


  7%|▋         | 190/2835 [05:30<1:16:40,  1.74s/it]

Epoch[3/10](190/2835) || training loss 0.643 || training accuracy 88.33% || lr 1.5017025841119247e-05


  7%|▋         | 200/2835 [05:47<1:16:32,  1.74s/it]

Epoch[3/10](200/2835) || training loss 0.6305 || training accuracy 92.50% || lr 1.4129192340165858e-05


  7%|▋         | 210/2835 [06:05<1:14:18,  1.70s/it]

Epoch[3/10](210/2835) || training loss 0.6223 || training accuracy 94.17% || lr 1.3268837919525425e-05


  8%|▊         | 220/2835 [06:21<1:13:13,  1.68s/it]

Epoch[3/10](220/2835) || training loss 0.6101 || training accuracy 88.33% || lr 1.2436174858777398e-05


  8%|▊         | 230/2835 [06:38<1:13:55,  1.70s/it]

Epoch[3/10](230/2835) || training loss 0.68 || training accuracy 90.00% || lr 1.1631408605072604e-05


  8%|▊         | 240/2835 [06:56<1:14:26,  1.72s/it]

Epoch[3/10](240/2835) || training loss 0.6382 || training accuracy 90.83% || lr 1.085473772244224e-05


  9%|▉         | 250/2835 [07:13<1:14:17,  1.72s/it]

Epoch[3/10](250/2835) || training loss 0.6773 || training accuracy 87.50% || lr 1.010635384280496e-05


  9%|▉         | 260/2835 [07:30<1:14:44,  1.74s/it]

Epoch[3/10](260/2835) || training loss 0.6287 || training accuracy 82.50% || lr 9.386441618684799e-06


 10%|▉         | 270/2835 [07:47<1:14:11,  1.74s/it]

Epoch[3/10](270/2835) || training loss 0.655 || training accuracy 89.17% || lr 8.695178677650736e-06


 10%|▉         | 280/2835 [08:05<1:14:25,  1.75s/it]

Epoch[3/10](280/2835) || training loss 0.6174 || training accuracy 88.33% || lr 8.032735578489897e-06


 10%|█         | 290/2835 [08:22<1:13:45,  1.74s/it]

Epoch[3/10](290/2835) || training loss 0.6293 || training accuracy 89.17% || lr 7.399275769124677e-06


 11%|█         | 300/2835 [08:40<1:15:04,  1.78s/it]

Epoch[3/10](300/2835) || training loss 0.6163 || training accuracy 92.50% || lr 6.7949555462843494e-06


 11%|█         | 310/2835 [08:57<1:12:56,  1.73s/it]

Epoch[3/10](310/2835) || training loss 0.6641 || training accuracy 87.50% || lr 6.219924016941257e-06


 11%|█▏        | 320/2835 [09:14<1:12:00,  1.72s/it]

Epoch[3/10](320/2835) || training loss 0.6368 || training accuracy 93.33% || lr 5.674323061520927e-06


 12%|█▏        | 330/2835 [09:32<1:12:02,  1.73s/it]

Epoch[3/10](330/2835) || training loss 0.5606 || training accuracy 90.83% || lr 5.1582872988950586e-06


 12%|█▏        | 340/2835 [09:49<1:12:11,  1.74s/it]

Epoch[3/10](340/2835) || training loss 0.6245 || training accuracy 90.00% || lr 4.671944053166463e-06


 12%|█▏        | 350/2835 [10:06<1:10:44,  1.71s/it]

Epoch[3/10](350/2835) || training loss 0.6332 || training accuracy 89.17% || lr 4.215413322253757e-06


 13%|█▎        | 360/2835 [10:23<1:10:43,  1.71s/it]

Epoch[3/10](360/2835) || training loss 0.6039 || training accuracy 95.00% || lr 3.788807748283571e-06


 13%|█▎        | 370/2835 [10:40<1:10:06,  1.71s/it]

Epoch[3/10](370/2835) || training loss 0.6399 || training accuracy 91.67% || lr 3.392232589797811e-06


 13%|█▎        | 380/2835 [10:57<1:09:10,  1.69s/it]

Epoch[3/10](380/2835) || training loss 0.6278 || training accuracy 89.17% || lr 3.0257856957827882e-06


 14%|█▍        | 390/2835 [11:14<1:09:35,  1.71s/it]

Epoch[3/10](390/2835) || training loss 0.6652 || training accuracy 86.67% || lr 2.689557481526318e-06


 14%|█▍        | 400/2835 [11:31<1:09:15,  1.71s/it]

Epoch[3/10](400/2835) || training loss 0.5983 || training accuracy 90.00% || lr 2.383630906309188e-06


 14%|█▍        | 410/2835 [11:48<1:08:47,  1.70s/it]

Epoch[3/10](410/2835) || training loss 0.5727 || training accuracy 96.67% || lr 2.1080814529361466e-06


 15%|█▍        | 420/2835 [12:06<1:09:11,  1.72s/it]

Epoch[3/10](420/2835) || training loss 0.6368 || training accuracy 87.50% || lr 1.8629771091116763e-06


 15%|█▌        | 430/2835 [12:23<1:08:19,  1.70s/it]

Epoch[3/10](430/2835) || training loss 0.6216 || training accuracy 90.00% || lr 1.6483783506650532e-06


 16%|█▌        | 440/2835 [12:40<1:09:15,  1.73s/it]

Epoch[3/10](440/2835) || training loss 0.6889 || training accuracy 91.67% || lr 1.4643381266288135e-06


 16%|█▌        | 450/2835 [12:57<1:07:46,  1.70s/it]

Epoch[3/10](450/2835) || training loss 0.6283 || training accuracy 87.50% || lr 1.3109018461743795e-06


 16%|█▌        | 460/2835 [13:15<1:08:58,  1.74s/it]

Epoch[3/10](460/2835) || training loss 0.6362 || training accuracy 90.83% || lr 1.1881073674080275e-06


 17%|█▋        | 470/2835 [13:32<1:08:18,  1.73s/it]

Epoch[3/10](470/2835) || training loss 0.6095 || training accuracy 90.00% || lr 1.0959849880299989e-06


 17%|█▋        | 480/2835 [13:49<1:08:24,  1.74s/it]

Epoch[3/10](480/2835) || training loss 0.63 || training accuracy 89.17% || lr 1.0345574378589307e-06


 17%|█▋        | 490/2835 [14:07<1:08:04,  1.74s/it]

Epoch[3/10](490/2835) || training loss 0.6385 || training accuracy 88.33% || lr 1.003839873223687e-06


 18%|█▊        | 500/2835 [14:24<1:07:08,  1.73s/it]

Epoch[3/10](500/2835) || training loss 0.6119 || training accuracy 90.00% || lr 1.62e-06


 18%|█▊        | 510/2835 [14:41<1:06:24,  1.71s/it]

Epoch[3/10](510/2835) || training loss 0.6854 || training accuracy 86.67% || lr 2.86e-06


 18%|█▊        | 520/2835 [14:59<1:06:32,  1.72s/it]

Epoch[3/10](520/2835) || training loss 0.6578 || training accuracy 85.83% || lr 4.1000000000000006e-06


 19%|█▊        | 530/2835 [15:16<1:05:15,  1.70s/it]

Epoch[3/10](530/2835) || training loss 0.6449 || training accuracy 91.67% || lr 5.34e-06


 19%|█▉        | 540/2835 [15:33<1:05:59,  1.73s/it]

Epoch[3/10](540/2835) || training loss 0.6304 || training accuracy 91.67% || lr 6.58e-06


 19%|█▉        | 550/2835 [15:50<1:06:35,  1.75s/it]

Epoch[3/10](550/2835) || training loss 0.645 || training accuracy 91.67% || lr 7.820000000000001e-06


 20%|█▉        | 560/2835 [16:08<1:05:22,  1.72s/it]

Epoch[3/10](560/2835) || training loss 0.639 || training accuracy 92.50% || lr 9.060000000000001e-06


 20%|██        | 570/2835 [16:25<1:05:43,  1.74s/it]

Epoch[3/10](570/2835) || training loss 0.596 || training accuracy 92.50% || lr 1.0300000000000001e-05


 20%|██        | 580/2835 [16:43<1:05:16,  1.74s/it]

Epoch[3/10](580/2835) || training loss 0.5952 || training accuracy 91.67% || lr 1.1540000000000001e-05


 21%|██        | 590/2835 [17:00<1:04:59,  1.74s/it]

Epoch[3/10](590/2835) || training loss 0.6517 || training accuracy 86.67% || lr 1.2780000000000001e-05


 21%|██        | 600/2835 [17:17<1:04:50,  1.74s/it]

Epoch[3/10](600/2835) || training loss 0.6623 || training accuracy 92.50% || lr 1.4020000000000001e-05


 22%|██▏       | 610/2835 [17:35<1:04:08,  1.73s/it]

Epoch[3/10](610/2835) || training loss 0.6096 || training accuracy 93.33% || lr 1.526e-05


 22%|██▏       | 620/2835 [17:52<1:02:01,  1.68s/it]

Epoch[3/10](620/2835) || training loss 0.6438 || training accuracy 90.00% || lr 1.65e-05


 22%|██▏       | 630/2835 [18:08<1:02:41,  1.71s/it]

Epoch[3/10](630/2835) || training loss 0.6285 || training accuracy 90.83% || lr 1.7740000000000003e-05


 23%|██▎       | 640/2835 [18:26<1:03:15,  1.73s/it]

Epoch[3/10](640/2835) || training loss 0.6495 || training accuracy 87.50% || lr 1.898e-05


 23%|██▎       | 650/2835 [18:43<1:02:51,  1.73s/it]

Epoch[3/10](650/2835) || training loss 0.6338 || training accuracy 86.67% || lr 2.0220000000000003e-05


 23%|██▎       | 660/2835 [19:00<1:02:20,  1.72s/it]

Epoch[3/10](660/2835) || training loss 0.6628 || training accuracy 93.33% || lr 2.1460000000000005e-05


 24%|██▎       | 670/2835 [19:18<1:02:15,  1.73s/it]

Epoch[3/10](670/2835) || training loss 0.6418 || training accuracy 92.50% || lr 2.27e-05


 24%|██▍       | 680/2835 [19:35<1:02:23,  1.74s/it]

Epoch[3/10](680/2835) || training loss 0.6544 || training accuracy 90.83% || lr 2.394e-05


 24%|██▍       | 690/2835 [19:52<1:02:32,  1.75s/it]

Epoch[3/10](690/2835) || training loss 0.6143 || training accuracy 88.33% || lr 2.518e-05


 25%|██▍       | 700/2835 [20:10<1:03:27,  1.78s/it]

Epoch[3/10](700/2835) || training loss 0.6282 || training accuracy 95.00% || lr 2.642e-05


 25%|██▌       | 710/2835 [20:27<1:01:36,  1.74s/it]

Epoch[3/10](710/2835) || training loss 0.6156 || training accuracy 90.83% || lr 2.766e-05


 25%|██▌       | 720/2835 [20:45<1:01:28,  1.74s/it]

Epoch[3/10](720/2835) || training loss 0.6536 || training accuracy 86.67% || lr 2.89e-05


 26%|██▌       | 730/2835 [21:02<1:00:29,  1.72s/it]

Epoch[3/10](730/2835) || training loss 0.6434 || training accuracy 86.67% || lr 3.0140000000000003e-05


 26%|██▌       | 740/2835 [21:19<1:00:45,  1.74s/it]

Epoch[3/10](740/2835) || training loss 0.655 || training accuracy 90.00% || lr 3.138e-05


 26%|██▋       | 750/2835 [21:37<1:00:06,  1.73s/it]

Epoch[3/10](750/2835) || training loss 0.6378 || training accuracy 91.67% || lr 3.262e-05


 27%|██▋       | 760/2835 [21:54<58:58,  1.71s/it]  

Epoch[3/10](760/2835) || training loss 0.6074 || training accuracy 90.83% || lr 3.386e-05


 27%|██▋       | 770/2835 [22:11<59:21,  1.72s/it]

Epoch[3/10](770/2835) || training loss 0.5689 || training accuracy 92.50% || lr 3.509999999999999e-05


 28%|██▊       | 780/2835 [22:28<57:45,  1.69s/it]

Epoch[3/10](780/2835) || training loss 0.6547 || training accuracy 90.00% || lr 3.634e-05


 28%|██▊       | 790/2835 [22:45<58:39,  1.72s/it]

Epoch[3/10](790/2835) || training loss 0.6684 || training accuracy 90.00% || lr 3.7579999999999996e-05


 28%|██▊       | 800/2835 [23:02<1:00:25,  1.78s/it]

Epoch[3/10](800/2835) || training loss 0.6331 || training accuracy 92.50% || lr 3.882e-05


 29%|██▊       | 810/2835 [23:20<58:51,  1.74s/it]  

Epoch[3/10](810/2835) || training loss 0.6739 || training accuracy 90.83% || lr 4.006e-05


 29%|██▉       | 820/2835 [23:37<58:28,  1.74s/it]

Epoch[3/10](820/2835) || training loss 0.633 || training accuracy 93.33% || lr 4.13e-05


 29%|██▉       | 830/2835 [23:55<58:16,  1.74s/it]

Epoch[3/10](830/2835) || training loss 0.6023 || training accuracy 88.33% || lr 4.2539999999999996e-05


 30%|██▉       | 840/2835 [24:12<58:12,  1.75s/it]

Epoch[3/10](840/2835) || training loss 0.6471 || training accuracy 90.00% || lr 4.378e-05


 30%|██▉       | 850/2835 [24:30<57:21,  1.73s/it]

Epoch[3/10](850/2835) || training loss 0.6605 || training accuracy 90.00% || lr 4.502e-05


 30%|███       | 860/2835 [24:47<57:01,  1.73s/it]

Epoch[3/10](860/2835) || training loss 0.6487 || training accuracy 90.00% || lr 4.626e-05


 31%|███       | 870/2835 [25:04<56:27,  1.72s/it]

Epoch[3/10](870/2835) || training loss 0.6257 || training accuracy 91.67% || lr 4.7499999999999996e-05


 31%|███       | 880/2835 [25:21<56:30,  1.73s/it]

Epoch[3/10](880/2835) || training loss 0.6456 || training accuracy 92.50% || lr 4.8740000000000004e-05


 31%|███▏      | 890/2835 [25:39<56:21,  1.74s/it]

Epoch[3/10](890/2835) || training loss 0.6084 || training accuracy 90.00% || lr 4.998e-05


 32%|███▏      | 900/2835 [25:56<56:13,  1.74s/it]

Epoch[3/10](900/2835) || training loss 0.6609 || training accuracy 88.33% || lr 5.122e-05


 32%|███▏      | 910/2835 [26:14<55:19,  1.72s/it]

Epoch[3/10](910/2835) || training loss 0.6136 || training accuracy 88.33% || lr 5.2459999999999996e-05


 32%|███▏      | 920/2835 [26:31<55:41,  1.74s/it]

Epoch[3/10](920/2835) || training loss 0.6124 || training accuracy 90.83% || lr 5.3700000000000004e-05


 33%|███▎      | 930/2835 [26:48<55:11,  1.74s/it]

Epoch[3/10](930/2835) || training loss 0.6297 || training accuracy 91.67% || lr 5.494e-05


 33%|███▎      | 940/2835 [27:06<55:02,  1.74s/it]

Epoch[3/10](940/2835) || training loss 0.6036 || training accuracy 92.50% || lr 5.6179999999999994e-05


 34%|███▎      | 950/2835 [27:23<54:37,  1.74s/it]

Epoch[3/10](950/2835) || training loss 0.6614 || training accuracy 90.83% || lr 5.742e-05


 34%|███▍      | 960/2835 [27:41<54:36,  1.75s/it]

Epoch[3/10](960/2835) || training loss 0.6038 || training accuracy 93.33% || lr 5.866e-05


 34%|███▍      | 970/2835 [27:58<54:00,  1.74s/it]

Epoch[3/10](970/2835) || training loss 0.6297 || training accuracy 90.83% || lr 5.99e-05


 35%|███▍      | 980/2835 [28:15<53:30,  1.73s/it]

Epoch[3/10](980/2835) || training loss  0.6 || training accuracy 89.17% || lr 6.114e-05


 35%|███▍      | 990/2835 [28:33<53:34,  1.74s/it]

Epoch[3/10](990/2835) || training loss 0.6007 || training accuracy 88.33% || lr 6.238e-05


 35%|███▌      | 1000/2835 [28:50<52:31,  1.72s/it]

Epoch[3/10](1000/2835) || training loss 0.6294 || training accuracy 91.67% || lr 6.362e-05


 36%|███▌      | 1010/2835 [29:08<52:53,  1.74s/it]

Epoch[3/10](1010/2835) || training loss 0.6123 || training accuracy 90.00% || lr 6.486e-05


 36%|███▌      | 1020/2835 [29:25<51:53,  1.72s/it]

Epoch[3/10](1020/2835) || training loss 0.6625 || training accuracy 90.00% || lr 6.610000000000001e-05


 36%|███▋      | 1030/2835 [29:42<52:21,  1.74s/it]

Epoch[3/10](1030/2835) || training loss 0.6187 || training accuracy 92.50% || lr 6.733999999999999e-05


 37%|███▋      | 1040/2835 [30:00<51:38,  1.73s/it]

Epoch[3/10](1040/2835) || training loss 0.6456 || training accuracy 90.00% || lr 6.858e-05


 37%|███▋      | 1050/2835 [30:17<51:23,  1.73s/it]

Epoch[3/10](1050/2835) || training loss 0.6269 || training accuracy 87.50% || lr 6.982e-05


 37%|███▋      | 1060/2835 [30:34<50:12,  1.70s/it]

Epoch[3/10](1060/2835) || training loss 0.6473 || training accuracy 88.33% || lr 7.106e-05


 38%|███▊      | 1070/2835 [30:51<50:35,  1.72s/it]

Epoch[3/10](1070/2835) || training loss 0.6389 || training accuracy 85.00% || lr 7.23e-05


 38%|███▊      | 1080/2835 [31:09<50:51,  1.74s/it]

Epoch[3/10](1080/2835) || training loss 0.6321 || training accuracy 89.17% || lr 7.354e-05


 38%|███▊      | 1090/2835 [31:26<50:45,  1.75s/it]

Epoch[3/10](1090/2835) || training loss 0.6484 || training accuracy 85.00% || lr 7.478e-05


 39%|███▉      | 1100/2835 [31:44<49:35,  1.71s/it]

Epoch[3/10](1100/2835) || training loss 0.6093 || training accuracy 89.17% || lr 7.602e-05


 39%|███▉      | 1110/2835 [32:01<49:51,  1.73s/it]

Epoch[3/10](1110/2835) || training loss 0.6325 || training accuracy 90.83% || lr 7.726e-05


 40%|███▉      | 1120/2835 [32:18<49:06,  1.72s/it]

Epoch[3/10](1120/2835) || training loss 0.6473 || training accuracy 90.00% || lr 7.85e-05


 40%|███▉      | 1130/2835 [32:36<49:25,  1.74s/it]

Epoch[3/10](1130/2835) || training loss 0.607 || training accuracy 93.33% || lr 7.974e-05


 40%|████      | 1140/2835 [32:53<49:10,  1.74s/it]

Epoch[3/10](1140/2835) || training loss 0.6072 || training accuracy 92.50% || lr 8.098e-05


 41%|████      | 1150/2835 [33:10<49:00,  1.74s/it]

Epoch[3/10](1150/2835) || training loss 0.6549 || training accuracy 89.17% || lr 8.222e-05


 41%|████      | 1160/2835 [33:27<47:14,  1.69s/it]

Epoch[3/10](1160/2835) || training loss 0.6427 || training accuracy 87.50% || lr 8.346e-05


 41%|████▏     | 1170/2835 [33:45<48:12,  1.74s/it]

Epoch[3/10](1170/2835) || training loss 0.6606 || training accuracy 90.00% || lr 8.470000000000001e-05


 42%|████▏     | 1180/2835 [34:03<53:38,  1.94s/it]

Epoch[3/10](1180/2835) || training loss 0.6512 || training accuracy 90.83% || lr 8.594e-05


 42%|████▏     | 1190/2835 [34:24<57:05,  2.08s/it]

Epoch[3/10](1190/2835) || training loss 0.6459 || training accuracy 90.83% || lr 8.718e-05


 42%|████▏     | 1200/2835 [34:45<57:25,  2.11s/it]

Epoch[3/10](1200/2835) || training loss 0.6093 || training accuracy 94.17% || lr 8.842e-05


 43%|████▎     | 1210/2835 [35:06<57:08,  2.11s/it]

Epoch[3/10](1210/2835) || training loss 0.6283 || training accuracy 91.67% || lr 8.966e-05


 43%|████▎     | 1220/2835 [35:27<56:37,  2.10s/it]

Epoch[3/10](1220/2835) || training loss 0.6673 || training accuracy 86.67% || lr 9.09e-05


 43%|████▎     | 1230/2835 [35:48<55:52,  2.09s/it]

Epoch[3/10](1230/2835) || training loss 0.6302 || training accuracy 85.83% || lr 9.214e-05


 44%|████▎     | 1240/2835 [36:05<45:12,  1.70s/it]

Epoch[3/10](1240/2835) || training loss 0.6263 || training accuracy 92.50% || lr 9.338e-05


 44%|████▍     | 1250/2835 [36:22<45:14,  1.71s/it]

Epoch[3/10](1250/2835) || training loss 0.6513 || training accuracy 91.67% || lr 9.462000000000001e-05


 44%|████▍     | 1260/2835 [36:39<45:44,  1.74s/it]

Epoch[3/10](1260/2835) || training loss 0.6749 || training accuracy 83.33% || lr 9.586e-05


 45%|████▍     | 1270/2835 [36:57<45:12,  1.73s/it]

Epoch[3/10](1270/2835) || training loss 0.641 || training accuracy 88.33% || lr 9.71e-05


 45%|████▌     | 1280/2835 [37:14<44:48,  1.73s/it]

Epoch[3/10](1280/2835) || training loss 0.6615 || training accuracy 88.33% || lr 9.834000000000001e-05


 46%|████▌     | 1290/2835 [37:31<44:35,  1.73s/it]

Epoch[3/10](1290/2835) || training loss 0.6216 || training accuracy 92.50% || lr 9.957999999999999e-05


 46%|████▌     | 1300/2835 [37:48<43:35,  1.70s/it]

Epoch[3/10](1300/2835) || training loss 0.6538 || training accuracy 89.17% || lr 0.00010082


 46%|████▌     | 1310/2835 [38:05<43:24,  1.71s/it]

Epoch[3/10](1310/2835) || training loss 0.6384 || training accuracy 93.33% || lr 0.00010206000000000001


 47%|████▋     | 1320/2835 [38:23<43:30,  1.72s/it]

Epoch[3/10](1320/2835) || training loss 0.6687 || training accuracy 85.83% || lr 0.0001033


 47%|████▋     | 1330/2835 [38:40<43:22,  1.73s/it]

Epoch[3/10](1330/2835) || training loss 0.6167 || training accuracy 94.17% || lr 0.00010454


 47%|████▋     | 1340/2835 [38:57<42:46,  1.72s/it]

Epoch[3/10](1340/2835) || training loss 0.6124 || training accuracy 92.50% || lr 0.00010578


 48%|████▊     | 1350/2835 [39:14<42:37,  1.72s/it]

Epoch[3/10](1350/2835) || training loss 0.6175 || training accuracy 90.83% || lr 0.00010702


 48%|████▊     | 1360/2835 [39:32<42:40,  1.74s/it]

Epoch[3/10](1360/2835) || training loss 0.6155 || training accuracy 93.33% || lr 0.00010826000000000001


 48%|████▊     | 1370/2835 [39:49<42:06,  1.72s/it]

Epoch[3/10](1370/2835) || training loss 0.6005 || training accuracy 91.67% || lr 0.00010949999999999999


 49%|████▊     | 1380/2835 [40:06<41:39,  1.72s/it]

Epoch[3/10](1380/2835) || training loss 0.6198 || training accuracy 92.50% || lr 0.00011074


 49%|████▉     | 1390/2835 [40:24<41:42,  1.73s/it]

Epoch[3/10](1390/2835) || training loss 0.6246 || training accuracy 91.67% || lr 0.00011198000000000001


 49%|████▉     | 1400/2835 [40:41<41:43,  1.74s/it]

Epoch[3/10](1400/2835) || training loss 0.5938 || training accuracy 90.00% || lr 0.00011322


 50%|████▉     | 1410/2835 [40:58<40:59,  1.73s/it]

Epoch[3/10](1410/2835) || training loss 0.6592 || training accuracy 89.17% || lr 0.00011446


 50%|█████     | 1420/2835 [41:16<40:29,  1.72s/it]

Epoch[3/10](1420/2835) || training loss 0.6482 || training accuracy 90.00% || lr 0.00011570000000000001


 50%|█████     | 1430/2835 [41:33<40:38,  1.74s/it]

Epoch[3/10](1430/2835) || training loss 0.6819 || training accuracy 89.17% || lr 0.00011694


 51%|█████     | 1440/2835 [41:50<39:59,  1.72s/it]

Epoch[3/10](1440/2835) || training loss 0.6227 || training accuracy 92.50% || lr 0.00011818000000000001


 51%|█████     | 1450/2835 [42:07<40:07,  1.74s/it]

Epoch[3/10](1450/2835) || training loss 0.6769 || training accuracy 88.33% || lr 0.00011942


 51%|█████▏    | 1460/2835 [42:25<38:06,  1.66s/it]

Epoch[3/10](1460/2835) || training loss 0.6328 || training accuracy 87.50% || lr 0.00012066


 52%|█████▏    | 1470/2835 [42:42<39:37,  1.74s/it]

Epoch[3/10](1470/2835) || training loss 0.6446 || training accuracy 92.50% || lr 0.00012190000000000001


 52%|█████▏    | 1480/2835 [42:59<39:28,  1.75s/it]

Epoch[3/10](1480/2835) || training loss 0.6237 || training accuracy 88.33% || lr 0.00012314


 53%|█████▎    | 1490/2835 [43:17<38:26,  1.71s/it]

Epoch[3/10](1490/2835) || training loss 0.6427 || training accuracy 93.33% || lr 0.00012438


 53%|█████▎    | 1500/2835 [43:34<38:51,  1.75s/it]

Epoch[3/10](1500/2835) || training loss 0.6384 || training accuracy 90.00% || lr 0.00012499808777397697


 53%|█████▎    | 1510/2835 [43:52<38:25,  1.74s/it]

Epoch[3/10](1510/2835) || training loss 0.621 || training accuracy 93.33% || lr 0.00012498279067351604


 54%|█████▎    | 1520/2835 [44:09<38:12,  1.74s/it]

Epoch[3/10](1520/2835) || training loss 0.6207 || training accuracy 88.33% || lr 0.00012495220024692484


 54%|█████▍    | 1530/2835 [44:26<37:46,  1.74s/it]

Epoch[3/10](1530/2835) || training loss 0.6359 || training accuracy 89.17% || lr 0.00012490632404193335


 54%|█████▍    | 1540/2835 [44:44<37:15,  1.73s/it]

Epoch[3/10](1540/2835) || training loss 0.6255 || training accuracy 90.83% || lr 0.00012484517337780875


 55%|█████▍    | 1550/2835 [45:01<37:17,  1.74s/it]

Epoch[3/10](1550/2835) || training loss 0.6207 || training accuracy 94.17% || lr 0.00012476876334256237


 55%|█████▌    | 1560/2835 [45:19<37:49,  1.78s/it]

Epoch[3/10](1560/2835) || training loss 0.6576 || training accuracy 85.00% || lr 0.00012467711278922705


 55%|█████▌    | 1570/2835 [45:36<36:36,  1.74s/it]

Epoch[3/10](1570/2835) || training loss 0.5996 || training accuracy 95.00% || lr 0.00012457024433120545


 56%|█████▌    | 1580/2835 [45:53<36:20,  1.74s/it]

Epoch[3/10](1580/2835) || training loss 0.5915 || training accuracy 94.17% || lr 0.00012444818433669046


 56%|█████▌    | 1590/2835 [46:11<36:11,  1.74s/it]

Epoch[3/10](1590/2835) || training loss 0.5948 || training accuracy 92.50% || lr 0.0001243109629221593


 56%|█████▋    | 1600/2835 [46:28<35:49,  1.74s/it]

Epoch[3/10](1600/2835) || training loss 0.6569 || training accuracy 89.17% || lr 0.0001241586139449427


 57%|█████▋    | 1610/2835 [46:46<35:42,  1.75s/it]

Epoch[3/10](1610/2835) || training loss 0.6158 || training accuracy 88.33% || lr 0.00012399117499487124


 57%|█████▋    | 1620/2835 [47:03<35:25,  1.75s/it]

Epoch[3/10](1620/2835) || training loss 0.6121 || training accuracy 90.83% || lr 0.00012380868738500028


 57%|█████▋    | 1630/2835 [47:20<34:32,  1.72s/it]

Epoch[3/10](1630/2835) || training loss 0.6257 || training accuracy 89.17% || lr 0.00012361119614141703


 58%|█████▊    | 1640/2835 [47:38<34:14,  1.72s/it]

Epoch[3/10](1640/2835) || training loss 0.6695 || training accuracy 90.00% || lr 0.00012339874999213067


 58%|█████▊    | 1650/2835 [47:55<33:45,  1.71s/it]

Epoch[3/10](1650/2835) || training loss 0.6516 || training accuracy 88.33% || lr 0.00012317140135504964


 59%|█████▊    | 1660/2835 [48:12<33:53,  1.73s/it]

Epoch[3/10](1660/2835) || training loss 0.6513 || training accuracy 89.17% || lr 0.00012292920632504825


 59%|█████▉    | 1670/2835 [48:29<33:38,  1.73s/it]

Epoch[3/10](1670/2835) || training loss 0.6343 || training accuracy 91.67% || lr 0.00012267222466012614


 59%|█████▉    | 1680/2835 [48:47<33:27,  1.74s/it]

Epoch[3/10](1680/2835) || training loss 0.6101 || training accuracy 91.67% || lr 0.0001224005197666638


 60%|█████▉    | 1690/2835 [49:04<33:01,  1.73s/it]

Epoch[3/10](1690/2835) || training loss 0.6651 || training accuracy 85.00% || lr 0.00012211415868377808


 60%|█████▉    | 1700/2835 [49:21<33:04,  1.75s/it]

Epoch[3/10](1700/2835) || training loss 0.6263 || training accuracy 89.17% || lr 0.00012181321206678128


 60%|██████    | 1710/2835 [49:39<32:46,  1.75s/it]

Epoch[3/10](1710/2835) || training loss 0.6139 || training accuracy 92.50% || lr 0.000121497754169748


 61%|██████    | 1720/2835 [49:56<33:18,  1.79s/it]

Epoch[3/10](1720/2835) || training loss 0.6377 || training accuracy 90.83% || lr 0.00012116786282719402


 61%|██████    | 1730/2835 [50:13<30:54,  1.68s/it]

Epoch[3/10](1730/2835) || training loss 0.6513 || training accuracy 88.33% || lr 0.00012082361943487184


 61%|██████▏   | 1740/2835 [50:31<31:23,  1.72s/it]

Epoch[3/10](1740/2835) || training loss 0.6492 || training accuracy 90.00% || lr 0.00012046510892968749


 62%|██████▏   | 1750/2835 [50:48<31:36,  1.75s/it]

Epoch[3/10](1750/2835) || training loss 0.6308 || training accuracy 91.67% || lr 0.00012009241976874364


 62%|██████▏   | 1760/2835 [51:06<31:20,  1.75s/it]

Epoch[3/10](1760/2835) || training loss 0.6195 || training accuracy 92.50% || lr 0.00011970564390751405


 62%|██████▏   | 1770/2835 [51:23<31:55,  1.80s/it]

Epoch[3/10](1770/2835) || training loss 0.6034 || training accuracy 95.00% || lr 0.00011930487677715505


 63%|██████▎   | 1780/2835 [51:40<30:24,  1.73s/it]

Epoch[3/10](1780/2835) || training loss 0.6565 || training accuracy 90.00% || lr 0.00011889021726095923


 63%|██████▎   | 1790/2835 [51:58<30:12,  1.73s/it]

Epoch[3/10](1790/2835) || training loss 0.6169 || training accuracy 93.33% || lr 0.00011846176766995755


 63%|██████▎   | 1800/2835 [52:15<29:41,  1.72s/it]

Epoch[3/10](1800/2835) || training loss 0.6506 || training accuracy 90.83% || lr 0.00011801963371767557


 64%|██████▍   | 1810/2835 [52:32<29:51,  1.75s/it]

Epoch[3/10](1810/2835) || training loss 0.6433 || training accuracy 85.83% || lr 0.00011756392449405019


 64%|██████▍   | 1820/2835 [52:50<29:33,  1.75s/it]

Epoch[3/10](1820/2835) || training loss 0.6549 || training accuracy 91.67% || lr 0.00011709475243851343


 65%|██████▍   | 1830/2835 [53:07<29:15,  1.75s/it]

Epoch[3/10](1830/2835) || training loss 0.6135 || training accuracy 89.17% || lr 0.00011661223331224962


 65%|██████▍   | 1840/2835 [53:25<28:57,  1.75s/it]

Epoch[3/10](1840/2835) || training loss 0.6272 || training accuracy 92.50% || lr 0.00011611648616963309


 65%|██████▌   | 1850/2835 [53:42<28:35,  1.74s/it]

Epoch[3/10](1850/2835) || training loss 0.6217 || training accuracy 90.00% || lr 0.00011560763332885323


 66%|██████▌   | 1860/2835 [54:00<28:18,  1.74s/it]

Epoch[3/10](1860/2835) || training loss 0.6365 || training accuracy 87.50% || lr 0.00011508580034173439


 66%|██████▌   | 1870/2835 [54:17<28:05,  1.75s/it]

Epoch[3/10](1870/2835) || training loss 0.6133 || training accuracy 89.17% || lr 0.00011455111596275781


 66%|██████▋   | 1880/2835 [54:34<27:38,  1.74s/it]

Epoch[3/10](1880/2835) || training loss 0.6178 || training accuracy 88.33% || lr 0.00011400371211729331


 67%|██████▋   | 1890/2835 [54:52<27:25,  1.74s/it]

Epoch[3/10](1890/2835) || training loss 0.6286 || training accuracy 93.33% || lr 0.0001134437238690488


 67%|██████▋   | 1900/2835 [55:09<26:57,  1.73s/it]

Epoch[3/10](1900/2835) || training loss 0.6228 || training accuracy 94.17% || lr 0.00011287128938674524


 67%|██████▋   | 1910/2835 [55:27<26:49,  1.74s/it]

Epoch[3/10](1910/2835) || training loss 0.6424 || training accuracy 90.83% || lr 0.00011228654991002563


 68%|██████▊   | 1920/2835 [55:44<26:33,  1.74s/it]

Epoch[3/10](1920/2835) || training loss 0.6328 || training accuracy 92.50% || lr 0.00011168964971460618


 68%|██████▊   | 1930/2835 [56:01<26:23,  1.75s/it]

Epoch[3/10](1930/2835) || training loss 0.594 || training accuracy 90.00% || lr 0.00011108073607667853


 68%|██████▊   | 1940/2835 [56:19<26:05,  1.75s/it]

Epoch[3/10](1940/2835) || training loss 0.6142 || training accuracy 89.17% || lr 0.00011045995923657147


 69%|██████▉   | 1950/2835 [56:36<25:44,  1.74s/it]

Epoch[3/10](1950/2835) || training loss 0.6126 || training accuracy 94.17% || lr 0.00010982747236168139


 69%|██████▉   | 1960/2835 [56:54<25:24,  1.74s/it]

Epoch[3/10](1960/2835) || training loss 0.607 || training accuracy 95.83% || lr 0.00010918343150868057


 69%|██████▉   | 1970/2835 [57:11<25:09,  1.75s/it]

Epoch[3/10](1970/2835) || training loss 0.6535 || training accuracy 86.67% || lr 0.0001085279955850125


 70%|██████▉   | 1980/2835 [57:29<24:13,  1.70s/it]

Epoch[3/10](1980/2835) || training loss 0.645 || training accuracy 90.00% || lr 0.00010786132630968385


 70%|███████   | 1990/2835 [57:46<24:37,  1.75s/it]

Epoch[3/10](1990/2835) || training loss 0.6216 || training accuracy 90.00% || lr 0.00010718358817336274


 71%|███████   | 2000/2835 [58:04<24:04,  1.73s/it]

Epoch[3/10](2000/2835) || training loss 0.6341 || training accuracy 94.17% || lr 0.00010649494839779307


 71%|███████   | 2010/2835 [58:21<23:53,  1.74s/it]

Epoch[3/10](2010/2835) || training loss 0.6083 || training accuracy 91.67% || lr 0.00010579557689453511


 71%|███████▏  | 2020/2835 [58:38<23:45,  1.75s/it]

Epoch[3/10](2020/2835) || training loss 0.6233 || training accuracy 89.17% || lr 0.0001050856462230424


 72%|███████▏  | 2030/2835 [58:56<23:13,  1.73s/it]

Epoch[3/10](2030/2835) || training loss 0.6123 || training accuracy 85.83% || lr 0.00010436533154808519


 72%|███████▏  | 2040/2835 [59:13<23:01,  1.74s/it]

Epoch[3/10](2040/2835) || training loss 0.6229 || training accuracy 93.33% || lr 0.00010363481059653121


 72%|███████▏  | 2050/2835 [59:31<22:55,  1.75s/it]

Epoch[3/10](2050/2835) || training loss 0.6082 || training accuracy 89.17% || lr 0.0001028942636134943


 73%|███████▎  | 2060/2835 [59:48<22:30,  1.74s/it]

Epoch[3/10](2060/2835) || training loss 0.6318 || training accuracy 90.00% || lr 0.00010214387331786141


 73%|███████▎  | 2070/2835 [1:00:05<22:04,  1.73s/it]

Epoch[3/10](2070/2835) || training loss 0.6349 || training accuracy 93.33% || lr 0.0001013838248572097


 73%|███████▎  | 2080/2835 [1:00:23<21:42,  1.73s/it]

Epoch[3/10](2080/2835) || training loss 0.5981 || training accuracy 94.17% || lr 0.00010061430576212398


 74%|███████▎  | 2090/2835 [1:00:40<21:32,  1.73s/it]

Epoch[3/10](2090/2835) || training loss 0.6206 || training accuracy 92.50% || lr 9.98355058999264e-05


 74%|███████▍  | 2100/2835 [1:00:57<21:21,  1.74s/it]

Epoch[3/10](2100/2835) || training loss 0.6254 || training accuracy 91.67% || lr 9.904761742782949e-05


 74%|███████▍  | 2110/2835 [1:01:15<21:05,  1.75s/it]

Epoch[3/10](2110/2835) || training loss 0.6223 || training accuracy 89.17% || lr 9.825083474552437e-05


 75%|███████▍  | 2120/2835 [1:01:32<20:54,  1.75s/it]

Epoch[3/10](2120/2835) || training loss 0.6321 || training accuracy 94.17% || lr 9.744535444721534e-05


 75%|███████▌  | 2130/2835 [1:01:50<20:57,  1.78s/it]

Epoch[3/10](2130/2835) || training loss 0.5883 || training accuracy 92.50% || lr 9.663137527311337e-05


 75%|███████▌  | 2140/2835 [1:02:07<19:53,  1.72s/it]

Epoch[3/10](2140/2835) || training loss 0.6758 || training accuracy 83.33% || lr 9.580909806039981e-05


 76%|███████▌  | 2150/2835 [1:02:24<19:38,  1.72s/it]

Epoch[3/10](2150/2835) || training loss 0.6188 || training accuracy 93.33% || lr 9.497872569367294e-05


 76%|███████▌  | 2160/2835 [1:02:42<19:35,  1.74s/it]

Epoch[3/10](2160/2835) || training loss 0.6267 || training accuracy 95.83% || lr 9.41404630548891e-05


 77%|███████▋  | 2170/2835 [1:02:59<19:09,  1.73s/it]

Epoch[3/10](2170/2835) || training loss 0.6091 || training accuracy 91.67% || lr 9.329451697281122e-05


 77%|███████▋  | 2180/2835 [1:03:16<19:02,  1.74s/it]

Epoch[3/10](2180/2835) || training loss 0.6523 || training accuracy 90.83% || lr 9.244109617197687e-05


 77%|███████▋  | 2190/2835 [1:03:34<18:48,  1.75s/it]

Epoch[3/10](2190/2835) || training loss 0.6341 || training accuracy 93.33% || lr 9.158041122119867e-05


 78%|███████▊  | 2200/2835 [1:03:51<18:27,  1.74s/it]

Epoch[3/10](2200/2835) || training loss 0.6068 || training accuracy 95.83% || lr 9.071267448160952e-05


 78%|███████▊  | 2210/2835 [1:04:09<18:12,  1.75s/it]

Epoch[3/10](2210/2835) || training loss 0.6377 || training accuracy 92.50% || lr 8.983810005426567e-05


 78%|███████▊  | 2220/2835 [1:04:26<17:48,  1.74s/it]

Epoch[3/10](2220/2835) || training loss 0.6457 || training accuracy 90.00% || lr 8.895690372732055e-05


 79%|███████▊  | 2230/2835 [1:04:43<17:29,  1.73s/it]

Epoch[3/10](2230/2835) || training loss 0.6136 || training accuracy 93.33% || lr 8.80693029227822e-05


 79%|███████▉  | 2240/2835 [1:05:01<17:10,  1.73s/it]

Epoch[3/10](2240/2835) || training loss 0.6048 || training accuracy 93.33% || lr 8.717551664286768e-05


 79%|███████▉  | 2250/2835 [1:05:18<17:12,  1.76s/it]

Epoch[3/10](2250/2835) || training loss 0.6294 || training accuracy 91.67% || lr 8.627576541596756e-05


 80%|███████▉  | 2260/2835 [1:05:36<16:43,  1.75s/it]

Epoch[3/10](2260/2835) || training loss 0.6031 || training accuracy 93.33% || lr 8.53702712422338e-05


 80%|████████  | 2270/2835 [1:05:53<16:25,  1.74s/it]

Epoch[3/10](2270/2835) || training loss 0.6507 || training accuracy 91.67% || lr 8.445925753880456e-05


 80%|████████  | 2280/2835 [1:06:10<16:01,  1.73s/it]

Epoch[3/10](2280/2835) || training loss 0.6541 || training accuracy 90.00% || lr 8.35429490846794e-05


 81%|████████  | 2290/2835 [1:06:27<15:21,  1.69s/it]

Epoch[3/10](2290/2835) || training loss 0.611 || training accuracy 94.17% || lr 8.262157196525835e-05


 81%|████████  | 2300/2835 [1:06:45<15:23,  1.73s/it]

Epoch[3/10](2300/2835) || training loss 0.6365 || training accuracy 90.83% || lr 8.169535351655874e-05


 81%|████████▏ | 2310/2835 [1:07:02<15:14,  1.74s/it]

Epoch[3/10](2310/2835) || training loss 0.6197 || training accuracy 90.83% || lr 8.076452226912348e-05


 82%|████████▏ | 2320/2835 [1:07:19<14:59,  1.75s/it]

Epoch[3/10](2320/2835) || training loss 0.6312 || training accuracy 90.83% || lr 7.98293078916346e-05


 82%|████████▏ | 2330/2835 [1:07:37<14:41,  1.74s/it]

Epoch[3/10](2330/2835) || training loss 0.6158 || training accuracy 90.00% || lr 7.88899411342458e-05


 83%|████████▎ | 2340/2835 [1:07:54<14:20,  1.74s/it]

Epoch[3/10](2340/2835) || training loss 0.6165 || training accuracy 90.00% || lr 7.79466537716484e-05


 83%|████████▎ | 2350/2835 [1:08:12<13:57,  1.73s/it]

Epoch[3/10](2350/2835) || training loss 0.6369 || training accuracy 89.17% || lr 7.699967854588444e-05


 83%|████████▎ | 2360/2835 [1:08:29<13:46,  1.74s/it]

Epoch[3/10](2360/2835) || training loss 0.5896 || training accuracy 95.00% || lr 7.604924910892094e-05


 84%|████████▎ | 2370/2835 [1:08:46<13:25,  1.73s/it]

Epoch[3/10](2370/2835) || training loss 0.6031 || training accuracy 93.33% || lr 7.509559996499996e-05


 84%|████████▍ | 2380/2835 [1:09:04<13:16,  1.75s/it]

Epoch[3/10](2380/2835) || training loss 0.6209 || training accuracy 95.83% || lr 7.413896641277794e-05


 84%|████████▍ | 2390/2835 [1:09:21<12:55,  1.74s/it]

Epoch[3/10](2390/2835) || training loss 0.6122 || training accuracy 94.17% || lr 7.317958448726951e-05


 85%|████████▍ | 2400/2835 [1:09:39<12:38,  1.74s/it]

Epoch[3/10](2400/2835) || training loss 0.6065 || training accuracy 90.83% || lr 7.221769090160924e-05


 85%|████████▌ | 2410/2835 [1:09:56<12:21,  1.74s/it]

Epoch[3/10](2410/2835) || training loss 0.6285 || training accuracy 88.33% || lr 7.125352298864626e-05


 85%|████████▌ | 2420/2835 [1:10:13<12:02,  1.74s/it]

Epoch[3/10](2420/2835) || training loss 0.637 || training accuracy 88.33% || lr 7.028731864238594e-05


 86%|████████▌ | 2430/2835 [1:10:31<11:46,  1.74s/it]

Epoch[3/10](2430/2835) || training loss 0.6226 || training accuracy 87.50% || lr 6.93193162592931e-05


 86%|████████▌ | 2440/2835 [1:10:48<11:28,  1.74s/it]

Epoch[3/10](2440/2835) || training loss 0.6075 || training accuracy 90.00% || lr 6.834975467947126e-05


 86%|████████▋ | 2450/2835 [1:11:06<11:10,  1.74s/it]

Epoch[3/10](2450/2835) || training loss 0.6105 || training accuracy 90.00% || lr 6.737887312773234e-05


 87%|████████▋ | 2460/2835 [1:11:23<10:46,  1.72s/it]

Epoch[3/10](2460/2835) || training loss 0.6552 || training accuracy 91.67% || lr 6.640691115457163e-05


 87%|████████▋ | 2470/2835 [1:11:40<10:30,  1.73s/it]

Epoch[3/10](2470/2835) || training loss 0.6164 || training accuracy 91.67% || lr 6.543410857706226e-05


 87%|████████▋ | 2480/2835 [1:11:58<10:20,  1.75s/it]

Epoch[3/10](2480/2835) || training loss 0.6444 || training accuracy 90.83% || lr 6.446070541968384e-05


 88%|████████▊ | 2490/2835 [1:12:15<09:50,  1.71s/it]

Epoch[3/10](2490/2835) || training loss 0.6469 || training accuracy 90.00% || lr 6.34869418551001e-05


 88%|████████▊ | 2500/2835 [1:12:32<09:39,  1.73s/it]

Epoch[3/10](2500/2835) || training loss 0.6706 || training accuracy 89.17% || lr 6.251305814489991e-05


 89%|████████▊ | 2510/2835 [1:12:50<09:24,  1.74s/it]

Epoch[3/10](2510/2835) || training loss 0.5862 || training accuracy 93.33% || lr 6.153929458031618e-05


 89%|████████▉ | 2520/2835 [1:13:07<09:04,  1.73s/it]

Epoch[3/10](2520/2835) || training loss 0.6439 || training accuracy 90.00% || lr 6.056589142293775e-05


 89%|████████▉ | 2530/2835 [1:13:24<08:51,  1.74s/it]

Epoch[3/10](2530/2835) || training loss 0.6021 || training accuracy 94.17% || lr 5.959308884542837e-05


 90%|████████▉ | 2540/2835 [1:13:42<08:16,  1.68s/it]

Epoch[3/10](2540/2835) || training loss 0.6594 || training accuracy 91.67% || lr 5.862112687226766e-05


 90%|████████▉ | 2550/2835 [1:13:59<08:13,  1.73s/it]

Epoch[3/10](2550/2835) || training loss 0.648 || training accuracy 93.33% || lr 5.7650245320528755e-05


 90%|█████████ | 2560/2835 [1:14:17<07:59,  1.74s/it]

Epoch[3/10](2560/2835) || training loss 0.6462 || training accuracy 92.50% || lr 5.668068374070691e-05


 91%|█████████ | 2570/2835 [1:14:34<07:39,  1.73s/it]

Epoch[3/10](2570/2835) || training loss 0.6405 || training accuracy 93.33% || lr 5.571268135761407e-05


 91%|█████████ | 2580/2835 [1:14:51<07:24,  1.74s/it]

Epoch[3/10](2580/2835) || training loss 0.6134 || training accuracy 92.50% || lr 5.474647701135375e-05


 91%|█████████▏| 2590/2835 [1:15:09<07:07,  1.75s/it]

Epoch[3/10](2590/2835) || training loss 0.6048 || training accuracy 90.83% || lr 5.378230909839079e-05


 92%|█████████▏| 2600/2835 [1:15:26<06:48,  1.74s/it]

Epoch[3/10](2600/2835) || training loss 0.6318 || training accuracy 93.33% || lr 5.2820415512730504e-05


 92%|█████████▏| 2610/2835 [1:15:44<06:30,  1.74s/it]

Epoch[3/10](2610/2835) || training loss 0.5945 || training accuracy 93.33% || lr 5.186103358722206e-05


 92%|█████████▏| 2620/2835 [1:16:01<06:12,  1.73s/it]

Epoch[3/10](2620/2835) || training loss 0.6077 || training accuracy 92.50% || lr 5.090440003500005e-05


 93%|█████████▎| 2630/2835 [1:16:18<05:55,  1.74s/it]

Epoch[3/10](2630/2835) || training loss 0.6222 || training accuracy 90.83% || lr 4.995075089107906e-05


 93%|█████████▎| 2640/2835 [1:16:36<05:39,  1.74s/it]

Epoch[3/10](2640/2835) || training loss 0.587 || training accuracy 95.83% || lr 4.9000321454115576e-05


 93%|█████████▎| 2650/2835 [1:16:53<05:21,  1.74s/it]

Epoch[3/10](2650/2835) || training loss 0.5988 || training accuracy 91.67% || lr 4.805334622835161e-05


 94%|█████████▍| 2660/2835 [1:17:11<05:05,  1.74s/it]

Epoch[3/10](2660/2835) || training loss 0.6193 || training accuracy 90.00% || lr 4.7110058865754205e-05


 94%|█████████▍| 2670/2835 [1:17:28<04:48,  1.75s/it]

Epoch[3/10](2670/2835) || training loss 0.6377 || training accuracy 90.83% || lr 4.61706921083654e-05


 95%|█████████▍| 2680/2835 [1:17:46<04:30,  1.74s/it]

Epoch[3/10](2680/2835) || training loss 0.6461 || training accuracy 91.67% || lr 4.523547773087651e-05


 95%|█████████▍| 2690/2835 [1:18:03<04:12,  1.74s/it]

Epoch[3/10](2690/2835) || training loss 0.619 || training accuracy 92.50% || lr 4.430464648344127e-05


 95%|█████████▌| 2700/2835 [1:18:20<03:54,  1.74s/it]

Epoch[3/10](2700/2835) || training loss 0.5964 || training accuracy 96.67% || lr 4.337842803474167e-05


 96%|█████████▌| 2710/2835 [1:18:38<03:37,  1.74s/it]

Epoch[3/10](2710/2835) || training loss 0.6635 || training accuracy 93.33% || lr 4.245705091532061e-05


 96%|█████████▌| 2720/2835 [1:18:55<03:18,  1.72s/it]

Epoch[3/10](2720/2835) || training loss 0.6147 || training accuracy 91.67% || lr 4.1540742461195435e-05


 96%|█████████▋| 2730/2835 [1:19:12<03:01,  1.73s/it]

Epoch[3/10](2730/2835) || training loss 0.6207 || training accuracy 90.83% || lr 4.06297287577662e-05


 97%|█████████▋| 2740/2835 [1:19:30<02:44,  1.73s/it]

Epoch[3/10](2740/2835) || training loss 0.612 || training accuracy 92.50% || lr 3.972423458403246e-05


 97%|█████████▋| 2750/2835 [1:19:47<02:27,  1.74s/it]

Epoch[3/10](2750/2835) || training loss 0.6339 || training accuracy 89.17% || lr 3.8824483357132334e-05


 97%|█████████▋| 2760/2835 [1:20:04<02:09,  1.73s/it]

Epoch[3/10](2760/2835) || training loss 0.6029 || training accuracy 94.17% || lr 3.793069707721781e-05


 98%|█████████▊| 2770/2835 [1:20:22<01:53,  1.74s/it]

Epoch[3/10](2770/2835) || training loss 0.6457 || training accuracy 83.33% || lr 3.704309627267945e-05


 98%|█████████▊| 2780/2835 [1:20:39<01:35,  1.74s/it]

Epoch[3/10](2780/2835) || training loss 0.6465 || training accuracy 93.33% || lr 3.616189994573433e-05


 98%|█████████▊| 2790/2835 [1:20:56<01:16,  1.70s/it]

Epoch[3/10](2790/2835) || training loss 0.6183 || training accuracy 89.17% || lr 3.528732551839049e-05


 99%|█████████▉| 2800/2835 [1:21:13<01:00,  1.72s/it]

Epoch[3/10](2800/2835) || training loss 0.681 || training accuracy 92.50% || lr 3.4419588778801345e-05


 99%|█████████▉| 2810/2835 [1:21:31<00:43,  1.74s/it]

Epoch[3/10](2810/2835) || training loss 0.6169 || training accuracy 92.50% || lr 3.3558903828023146e-05


 99%|█████████▉| 2820/2835 [1:21:48<00:25,  1.73s/it]

Epoch[3/10](2820/2835) || training loss 0.5929 || training accuracy 93.33% || lr 3.270548302718878e-05


100%|█████████▉| 2830/2835 [1:22:06<00:08,  1.75s/it]

Epoch[3/10](2830/2835) || training loss 0.6302 || training accuracy 90.00% || lr 3.18595369451109e-05


100%|██████████| 2835/2835 [1:22:14<00:00,  1.74s/it]

Calculating validation results...





New best model for val accuracy : 93.36%! saving the best model..


  0%|          | 0/2835 [00:00<?, ?it/s]

[Val] acc : 93.36%, loss: 0.63 || best acc : 93.36%, best loss: 0.63


  0%|          | 10/2835 [00:16<1:18:01,  1.66s/it]

Epoch[4/10](10/2835) || training loss 0.6652 || training accuracy 88.33% || lr 3.0605088987611173e-05


  1%|          | 20/2835 [00:34<1:22:00,  1.75s/it]

Epoch[4/10](20/2835) || training loss 0.5864 || training accuracy 93.33% || lr 2.9778738711302223e-05


  1%|          | 30/2835 [00:51<1:21:37,  1.75s/it]

Epoch[4/10](30/2835) || training loss 0.6433 || training accuracy 93.33% || lr 2.8960585284115844e-05


  1%|▏         | 40/2835 [01:09<1:18:22,  1.68s/it]

Epoch[4/10](40/2835) || training loss 0.6135 || training accuracy 91.67% || lr 2.81508305731679e-05


  2%|▏         | 50/2835 [01:26<1:21:01,  1.75s/it]

Epoch[4/10](50/2835) || training loss 0.6138 || training accuracy 93.33% || lr 2.734967437331673e-05


  2%|▏         | 60/2835 [01:43<1:19:35,  1.72s/it]

Epoch[4/10](60/2835) || training loss 0.6567 || training accuracy 89.17% || lr 2.6557314357866676e-05


  2%|▏         | 70/2835 [02:01<1:19:24,  1.72s/it]

Epoch[4/10](70/2835) || training loss 0.5975 || training accuracy 90.83% || lr 2.57739460297952e-05


  3%|▎         | 80/2835 [02:18<1:20:41,  1.76s/it]

Epoch[4/10](80/2835) || training loss 0.6077 || training accuracy 91.67% || lr 2.4999762673515473e-05


  3%|▎         | 90/2835 [02:35<1:19:18,  1.73s/it]

Epoch[4/10](90/2835) || training loss 0.6249 || training accuracy 95.83% || lr 2.423495530718628e-05


  4%|▎         | 100/2835 [02:53<1:18:37,  1.72s/it]

Epoch[4/10](100/2835) || training loss 0.6414 || training accuracy 87.50% || lr 2.3479712635581236e-05


  4%|▍         | 110/2835 [03:10<1:17:53,  1.72s/it]

Epoch[4/10](110/2835) || training loss 0.6358 || training accuracy 90.83% || lr 2.273422100352862e-05


  4%|▍         | 120/2835 [03:27<1:17:44,  1.72s/it]

Epoch[4/10](120/2835) || training loss 0.6236 || training accuracy 92.50% || lr 2.1998664349933574e-05


  5%|▍         | 130/2835 [03:44<1:18:18,  1.74s/it]

Epoch[4/10](130/2835) || training loss 0.6395 || training accuracy 94.17% || lr 2.127322416239407e-05


  5%|▍         | 140/2835 [04:02<1:18:32,  1.75s/it]

Epoch[4/10](140/2835) || training loss 0.6738 || training accuracy 91.67% || lr 2.0558079432421322e-05


  5%|▌         | 150/2835 [04:19<1:18:27,  1.75s/it]

Epoch[4/10](150/2835) || training loss 0.6154 || training accuracy 90.83% || lr 1.9853406611276514e-05


  6%|▌         | 160/2835 [04:37<1:17:17,  1.73s/it]

Epoch[4/10](160/2835) || training loss 0.612 || training accuracy 92.50% || lr 1.915937956643406e-05


  6%|▌         | 170/2835 [04:54<1:16:32,  1.72s/it]

Epoch[4/10](170/2835) || training loss 0.6199 || training accuracy 92.50% || lr 1.84761695386823e-05


  6%|▋         | 180/2835 [05:11<1:16:42,  1.73s/it]

Epoch[4/10](180/2835) || training loss 0.6062 || training accuracy 92.50% || lr 1.780394509987248e-05


  7%|▋         | 190/2835 [05:29<1:16:51,  1.74s/it]

Epoch[4/10](190/2835) || training loss 0.6095 || training accuracy 90.83% || lr 1.7142872111326216e-05


  7%|▋         | 200/2835 [05:46<1:16:05,  1.73s/it]

Epoch[4/10](200/2835) || training loss 0.592 || training accuracy 92.50% || lr 1.6493113682911507e-05


  7%|▋         | 210/2835 [06:03<1:16:03,  1.74s/it]

Epoch[4/10](210/2835) || training loss 0.5693 || training accuracy 94.17% || lr 1.585483013279808e-05


  8%|▊         | 220/2835 [06:21<1:15:23,  1.73s/it]

Epoch[4/10](220/2835) || training loss 0.6582 || training accuracy 85.83% || lr 1.5228178947901074e-05


  8%|▊         | 230/2835 [06:38<1:15:30,  1.74s/it]

Epoch[4/10](230/2835) || training loss 0.6226 || training accuracy 95.00% || lr 1.4613314745023554e-05


  8%|▊         | 240/2835 [06:56<1:15:22,  1.74s/it]

Epoch[4/10](240/2835) || training loss 0.6182 || training accuracy 90.83% || lr 1.4010389232707195e-05


  9%|▉         | 250/2835 [07:13<1:14:58,  1.74s/it]

Epoch[4/10](250/2835) || training loss 0.6314 || training accuracy 95.83% || lr 1.3419551173800406e-05


  9%|▉         | 260/2835 [07:30<1:14:50,  1.74s/it]

Epoch[4/10](260/2835) || training loss 0.6108 || training accuracy 96.67% || lr 1.2840946348753267e-05


 10%|▉         | 270/2835 [07:48<1:14:36,  1.75s/it]

Epoch[4/10](270/2835) || training loss 0.6351 || training accuracy 95.00% || lr 1.2274717519648556e-05


 10%|▉         | 280/2835 [08:05<1:13:20,  1.72s/it]

Epoch[4/10](280/2835) || training loss 0.6042 || training accuracy 97.50% || lr 1.1721004394977175e-05


 10%|█         | 290/2835 [08:23<1:13:15,  1.73s/it]

Epoch[4/10](290/2835) || training loss 0.6376 || training accuracy 95.83% || lr 1.1179943595167243e-05


 11%|█         | 300/2835 [08:40<1:13:40,  1.74s/it]

Epoch[4/10](300/2835) || training loss 0.6492 || training accuracy 88.33% || lr 1.0651668618875066e-05


 11%|█         | 310/2835 [08:57<1:13:16,  1.74s/it]

Epoch[4/10](310/2835) || training loss 0.5852 || training accuracy 91.67% || lr 1.0136309810046286e-05


 11%|█▏        | 320/2835 [09:15<1:11:55,  1.72s/it]

Epoch[4/10](320/2835) || training loss 0.5703 || training accuracy 94.17% || lr 9.633994325755502e-06


 12%|█▏        | 330/2835 [09:32<1:12:32,  1.74s/it]

Epoch[4/10](330/2835) || training loss 0.6291 || training accuracy 92.50% || lr 9.144846104832154e-06


 12%|█▏        | 340/2835 [09:49<1:12:34,  1.75s/it]

Epoch[4/10](340/2835) || training loss 0.6164 || training accuracy 92.50% || lr 8.668985837280459e-06


 12%|█▏        | 350/2835 [10:07<1:13:00,  1.76s/it]

Epoch[4/10](350/2835) || training loss 0.6781 || training accuracy 92.50% || lr 8.206530934501005e-06


 13%|█▎        | 360/2835 [10:24<1:10:54,  1.72s/it]

Epoch[4/10](360/2835) || training loss 0.6242 || training accuracy 91.67% || lr 7.757595500321198e-06


 13%|█▎        | 370/2835 [10:41<1:11:43,  1.75s/it]

Epoch[4/10](370/2835) || training loss 0.6512 || training accuracy 91.67% || lr 7.322290302841836e-06


 13%|█▎        | 380/2835 [10:59<1:10:44,  1.73s/it]

Epoch[4/10](380/2835) || training loss 0.6606 || training accuracy 85.00% || lr 6.9007227471068e-06


 14%|█▍        | 390/2835 [11:16<1:10:32,  1.73s/it]

Epoch[4/10](390/2835) || training loss 0.6326 || training accuracy 93.33% || lr 6.492996848602392e-06


 14%|█▍        | 400/2835 [11:33<1:10:39,  1.74s/it]

Epoch[4/10](400/2835) || training loss 0.5975 || training accuracy 93.33% || lr 6.09921320759317e-06


 14%|█▍        | 410/2835 [11:51<1:10:24,  1.74s/it]

Epoch[4/10](410/2835) || training loss 0.6139 || training accuracy 94.17% || lr 5.7194689843002225e-06


 15%|█▍        | 420/2835 [12:08<1:09:56,  1.74s/it]

Epoch[4/10](420/2835) || training loss 0.643 || training accuracy 95.83% || lr 5.35385787492841e-06


 15%|█▌        | 430/2835 [12:25<1:08:10,  1.70s/it]

Epoch[4/10](430/2835) || training loss 0.5912 || training accuracy 94.17% || lr 5.002470088548229e-06


 16%|█▌        | 440/2835 [12:43<1:09:23,  1.74s/it]

Epoch[4/10](440/2835) || training loss 0.6308 || training accuracy 94.17% || lr 4.665392324838029e-06


 16%|█▌        | 450/2835 [13:00<1:09:12,  1.74s/it]

Epoch[4/10](450/2835) || training loss 0.6156 || training accuracy 92.50% || lr 4.342707752692192e-06


 16%|█▌        | 460/2835 [13:17<1:09:04,  1.75s/it]

Epoch[4/10](460/2835) || training loss 0.6157 || training accuracy 94.17% || lr 4.034495989700481e-06


 17%|█▋        | 470/2835 [13:35<1:08:43,  1.74s/it]

Epoch[4/10](470/2835) || training loss 0.652 || training accuracy 93.33% || lr 3.7408330825035325e-06


 17%|█▋        | 480/2835 [13:52<1:08:27,  1.74s/it]

Epoch[4/10](480/2835) || training loss 0.6307 || training accuracy 90.83% || lr 3.46179148802953e-06


 17%|█▋        | 490/2835 [14:10<1:07:03,  1.72s/it]

Epoch[4/10](490/2835) || training loss 0.6476 || training accuracy 95.00% || lr 3.197440055616526e-06


 18%|█▊        | 500/2835 [14:27<1:08:59,  1.77s/it]

Epoch[4/10](500/2835) || training loss 0.6417 || training accuracy 91.67% || lr 2.9478440100248733e-06


 18%|█▊        | 510/2835 [14:44<1:06:50,  1.72s/it]

Epoch[4/10](510/2835) || training loss 0.5932 || training accuracy 93.33% || lr 2.7130649353440536e-06


 18%|█▊        | 520/2835 [15:02<1:07:26,  1.75s/it]

Epoch[4/10](520/2835) || training loss 0.6579 || training accuracy 90.00% || lr 2.493160759797666e-06


 19%|█▊        | 530/2835 [15:19<1:05:09,  1.70s/it]

Epoch[4/10](530/2835) || training loss 0.6477 || training accuracy 90.83% || lr 2.2881857414505167e-06


 19%|█▉        | 540/2835 [15:36<1:06:23,  1.74s/it]

Epoch[4/10](540/2835) || training loss 0.5849 || training accuracy 95.83% || lr 2.0981904548212992e-06


 19%|█▉        | 550/2835 [15:54<1:05:51,  1.73s/it]

Epoch[4/10](550/2835) || training loss 0.6323 || training accuracy 94.17% || lr 1.92322177840402e-06


 20%|█▉        | 560/2835 [16:11<1:04:53,  1.71s/it]

Epoch[4/10](560/2835) || training loss 0.6016 || training accuracy 95.83% || lr 1.763322883101465e-06


 20%|██        | 570/2835 [16:28<1:05:28,  1.73s/it]

Epoch[4/10](570/2835) || training loss 0.6433 || training accuracy 90.83% || lr 1.6185332215734328e-06


 20%|██        | 580/2835 [16:46<1:06:22,  1.77s/it]

Epoch[4/10](580/2835) || training loss 0.6715 || training accuracy 88.33% || lr 1.4888885185023785e-06


 21%|██        | 590/2835 [17:03<1:05:16,  1.74s/it]

Epoch[4/10](590/2835) || training loss 0.6387 || training accuracy 93.33% || lr 1.3744207617788582e-06


 21%|██        | 600/2835 [17:20<1:04:57,  1.74s/it]

Epoch[4/10](600/2835) || training loss 0.6248 || training accuracy 94.17% || lr 1.2751581946090403e-06


 22%|██▏       | 610/2835 [17:38<1:04:53,  1.75s/it]

Epoch[4/10](610/2835) || training loss 0.6699 || training accuracy 91.67% || lr 1.1911253085460663e-06


 22%|██▏       | 620/2835 [17:55<1:04:21,  1.74s/it]

Epoch[4/10](620/2835) || training loss 0.6551 || training accuracy 88.33% || lr 1.1223428374471632e-06


 22%|██▏       | 630/2835 [18:13<1:04:06,  1.74s/it]

Epoch[4/10](630/2835) || training loss 0.5809 || training accuracy 95.83% || lr 1.068827752357866e-06


 23%|██▎       | 640/2835 [18:30<1:03:22,  1.73s/it]

Epoch[4/10](640/2835) || training loss 0.6468 || training accuracy 90.00% || lr 1.0305932573246408e-06


 23%|██▎       | 650/2835 [18:48<1:03:17,  1.74s/it]

Epoch[4/10](650/2835) || training loss 0.6298 || training accuracy 92.50% || lr 1.0076487861370436e-06


 23%|██▎       | 660/2835 [19:05<1:02:39,  1.73s/it]

Epoch[4/10](660/2835) || training loss 0.6156 || training accuracy 91.67% || lr 1e-06


 24%|██▎       | 670/2835 [19:22<1:02:01,  1.72s/it]

Epoch[4/10](670/2835) || training loss 0.6572 || training accuracy 92.50% || lr 1.6150000000000002e-06


 24%|██▍       | 680/2835 [19:39<1:02:22,  1.74s/it]

Epoch[4/10](680/2835) || training loss 0.6412 || training accuracy 95.83% || lr 2.2300000000000002e-06


 24%|██▍       | 690/2835 [19:57<1:02:29,  1.75s/it]

Epoch[4/10](690/2835) || training loss 0.6261 || training accuracy 96.67% || lr 2.845e-06


 25%|██▍       | 700/2835 [20:15<1:02:35,  1.76s/it]

Epoch[4/10](700/2835) || training loss 0.6243 || training accuracy 91.67% || lr 3.4600000000000008e-06


 25%|██▌       | 710/2835 [20:32<1:01:38,  1.74s/it]

Epoch[4/10](710/2835) || training loss 0.6151 || training accuracy 91.67% || lr 4.075e-06


 25%|██▌       | 720/2835 [20:49<1:01:26,  1.74s/it]

Epoch[4/10](720/2835) || training loss 0.6055 || training accuracy 94.17% || lr 4.69e-06


 26%|██▌       | 730/2835 [21:07<1:01:11,  1.74s/it]

Epoch[4/10](730/2835) || training loss 0.6177 || training accuracy 92.50% || lr 5.305e-06


 26%|██▌       | 740/2835 [21:24<1:00:56,  1.75s/it]

Epoch[4/10](740/2835) || training loss 0.6046 || training accuracy 95.00% || lr 5.920000000000001e-06


 26%|██▋       | 750/2835 [21:41<59:18,  1.71s/it]  

Epoch[4/10](750/2835) || training loss 0.6222 || training accuracy 92.50% || lr 6.535e-06


 27%|██▋       | 760/2835 [21:59<1:00:41,  1.76s/it]

Epoch[4/10](760/2835) || training loss 0.6072 || training accuracy 93.33% || lr 7.15e-06


 27%|██▋       | 770/2835 [22:16<59:48,  1.74s/it]  

Epoch[4/10](770/2835) || training loss 0.619 || training accuracy 97.50% || lr 7.765000000000001e-06


 28%|██▊       | 780/2835 [22:34<59:50,  1.75s/it]  

Epoch[4/10](780/2835) || training loss 0.6706 || training accuracy 91.67% || lr 8.380000000000001e-06


 28%|██▊       | 790/2835 [22:51<59:28,  1.74s/it]

Epoch[4/10](790/2835) || training loss 0.6221 || training accuracy 91.67% || lr 8.995000000000001e-06


 28%|██▊       | 800/2835 [23:09<58:56,  1.74s/it]

Epoch[4/10](800/2835) || training loss 0.6244 || training accuracy 91.67% || lr 9.610000000000001e-06


 29%|██▊       | 810/2835 [23:26<58:43,  1.74s/it]

Epoch[4/10](810/2835) || training loss 0.6259 || training accuracy 91.67% || lr 1.0225000000000001e-05


 29%|██▉       | 820/2835 [23:43<58:05,  1.73s/it]

Epoch[4/10](820/2835) || training loss 0.6351 || training accuracy 94.17% || lr 1.0840000000000003e-05


 29%|██▉       | 830/2835 [24:01<57:51,  1.73s/it]

Epoch[4/10](830/2835) || training loss 0.6263 || training accuracy 90.83% || lr 1.1455000000000001e-05


 30%|██▉       | 840/2835 [24:18<57:51,  1.74s/it]

Epoch[4/10](840/2835) || training loss 0.632 || training accuracy 93.33% || lr 1.2070000000000001e-05


 30%|██▉       | 850/2835 [24:35<57:36,  1.74s/it]

Epoch[4/10](850/2835) || training loss 0.6157 || training accuracy 93.33% || lr 1.2685000000000001e-05


 30%|███       | 860/2835 [24:53<57:15,  1.74s/it]

Epoch[4/10](860/2835) || training loss 0.6221 || training accuracy 91.67% || lr 1.3300000000000001e-05


 31%|███       | 870/2835 [25:10<55:55,  1.71s/it]

Epoch[4/10](870/2835) || training loss 0.6154 || training accuracy 92.50% || lr 1.3915000000000001e-05


 31%|███       | 880/2835 [25:27<55:48,  1.71s/it]

Epoch[4/10](880/2835) || training loss 0.6063 || training accuracy 91.67% || lr 1.4530000000000001e-05


 31%|███▏      | 890/2835 [25:44<56:15,  1.74s/it]

Epoch[4/10](890/2835) || training loss 0.6215 || training accuracy 91.67% || lr 1.5145000000000002e-05


 32%|███▏      | 900/2835 [26:02<56:18,  1.75s/it]

Epoch[4/10](900/2835) || training loss 0.6109 || training accuracy 90.83% || lr 1.576e-05


 32%|███▏      | 910/2835 [26:19<55:01,  1.72s/it]

Epoch[4/10](910/2835) || training loss 0.6083 || training accuracy 93.33% || lr 1.6375e-05


 32%|███▏      | 920/2835 [26:36<55:29,  1.74s/it]

Epoch[4/10](920/2835) || training loss 0.6191 || training accuracy 95.00% || lr 1.699e-05


 33%|███▎      | 930/2835 [26:54<54:47,  1.73s/it]

Epoch[4/10](930/2835) || training loss 0.599 || training accuracy 90.83% || lr 1.7605000000000002e-05


 33%|███▎      | 940/2835 [27:11<55:00,  1.74s/it]

Epoch[4/10](940/2835) || training loss 0.6241 || training accuracy 95.00% || lr 1.8220000000000002e-05


 34%|███▎      | 950/2835 [27:28<54:29,  1.73s/it]

Epoch[4/10](950/2835) || training loss 0.601 || training accuracy 90.83% || lr 1.8835000000000002e-05


 34%|███▍      | 960/2835 [27:46<54:30,  1.74s/it]

Epoch[4/10](960/2835) || training loss 0.617 || training accuracy 93.33% || lr 1.9450000000000002e-05


 34%|███▍      | 970/2835 [28:03<53:37,  1.72s/it]

Epoch[4/10](970/2835) || training loss 0.6234 || training accuracy 93.33% || lr 2.0065000000000002e-05


 35%|███▍      | 980/2835 [28:20<53:09,  1.72s/it]

Epoch[4/10](980/2835) || training loss 0.6082 || training accuracy 93.33% || lr 2.0680000000000005e-05


 35%|███▍      | 990/2835 [28:37<53:07,  1.73s/it]

Epoch[4/10](990/2835) || training loss 0.6173 || training accuracy 94.17% || lr 2.1295000000000002e-05


 35%|███▌      | 1000/2835 [28:55<52:58,  1.73s/it]

Epoch[4/10](1000/2835) || training loss 0.6447 || training accuracy 95.00% || lr 2.1910000000000002e-05


 36%|███▌      | 1010/2835 [29:12<53:08,  1.75s/it]

Epoch[4/10](1010/2835) || training loss 0.5832 || training accuracy 94.17% || lr 2.2525000000000002e-05


 36%|███▌      | 1020/2835 [29:30<51:50,  1.71s/it]

Epoch[4/10](1020/2835) || training loss 0.6092 || training accuracy 92.50% || lr 2.3140000000000002e-05


 36%|███▋      | 1030/2835 [29:47<50:55,  1.69s/it]

Epoch[4/10](1030/2835) || training loss 0.6196 || training accuracy 95.83% || lr 2.3755000000000002e-05


 37%|███▋      | 1040/2835 [30:04<50:17,  1.68s/it]

Epoch[4/10](1040/2835) || training loss 0.5998 || training accuracy 97.50% || lr 2.4370000000000002e-05


 37%|███▋      | 1050/2835 [30:21<51:48,  1.74s/it]

Epoch[4/10](1050/2835) || training loss 0.6485 || training accuracy 90.83% || lr 2.4985000000000002e-05


 37%|███▋      | 1060/2835 [30:38<50:53,  1.72s/it]

Epoch[4/10](1060/2835) || training loss 0.6038 || training accuracy 92.50% || lr 2.5600000000000002e-05


 38%|███▊      | 1070/2835 [30:56<50:18,  1.71s/it]

Epoch[4/10](1070/2835) || training loss 0.6034 || training accuracy 89.17% || lr 2.6215000000000002e-05


 38%|███▊      | 1080/2835 [31:13<50:54,  1.74s/it]

Epoch[4/10](1080/2835) || training loss 0.6388 || training accuracy 90.83% || lr 2.6830000000000002e-05


 38%|███▊      | 1090/2835 [31:30<50:38,  1.74s/it]

Epoch[4/10](1090/2835) || training loss 0.6197 || training accuracy 91.67% || lr 2.7445000000000002e-05


 39%|███▉      | 1100/2835 [31:48<50:10,  1.74s/it]

Epoch[4/10](1100/2835) || training loss 0.6338 || training accuracy 92.50% || lr 2.8060000000000002e-05


 39%|███▉      | 1110/2835 [32:05<48:34,  1.69s/it]

Epoch[4/10](1110/2835) || training loss 0.6138 || training accuracy 91.67% || lr 2.8675000000000002e-05


 40%|███▉      | 1120/2835 [32:22<49:18,  1.73s/it]

Epoch[4/10](1120/2835) || training loss 0.6138 || training accuracy 93.33% || lr 2.9290000000000002e-05


 40%|███▉      | 1130/2835 [32:39<49:15,  1.73s/it]

Epoch[4/10](1130/2835) || training loss 0.5969 || training accuracy 95.83% || lr 2.9905000000000003e-05


 40%|████      | 1140/2835 [32:57<49:12,  1.74s/it]

Epoch[4/10](1140/2835) || training loss 0.6552 || training accuracy 94.17% || lr 3.052e-05


 41%|████      | 1150/2835 [33:14<47:34,  1.69s/it]

Epoch[4/10](1150/2835) || training loss 0.6562 || training accuracy 94.17% || lr 3.1135e-05


 41%|████      | 1160/2835 [33:31<48:17,  1.73s/it]

Epoch[4/10](1160/2835) || training loss 0.6247 || training accuracy 90.83% || lr 3.175e-05


 41%|████▏     | 1170/2835 [33:48<48:02,  1.73s/it]

Epoch[4/10](1170/2835) || training loss 0.6389 || training accuracy 91.67% || lr 3.2365e-05


 42%|████▏     | 1180/2835 [34:07<53:45,  1.95s/it]

Epoch[4/10](1180/2835) || training loss 0.6348 || training accuracy 91.67% || lr 3.298e-05


 42%|████▏     | 1190/2835 [34:28<57:32,  2.10s/it]

Epoch[4/10](1190/2835) || training loss 0.6291 || training accuracy 93.33% || lr 3.3595e-05


 42%|████▏     | 1200/2835 [34:49<57:31,  2.11s/it]

Epoch[4/10](1200/2835) || training loss 0.6611 || training accuracy 91.67% || lr 3.421e-05


 43%|████▎     | 1210/2835 [35:10<56:46,  2.10s/it]

Epoch[4/10](1210/2835) || training loss 0.5932 || training accuracy 95.00% || lr 3.4825e-05


 43%|████▎     | 1220/2835 [35:31<56:10,  2.09s/it]

Epoch[4/10](1220/2835) || training loss 0.6657 || training accuracy 90.83% || lr 3.544e-05


 43%|████▎     | 1230/2835 [35:52<56:11,  2.10s/it]

Epoch[4/10](1230/2835) || training loss 0.6154 || training accuracy 95.00% || lr 3.6055e-05


 44%|████▎     | 1240/2835 [36:08<44:36,  1.68s/it]

Epoch[4/10](1240/2835) || training loss 0.6368 || training accuracy 93.33% || lr 3.667e-05


 44%|████▍     | 1250/2835 [36:25<45:55,  1.74s/it]

Epoch[4/10](1250/2835) || training loss 0.6729 || training accuracy 89.17% || lr 3.7285e-05


 44%|████▍     | 1260/2835 [36:43<45:45,  1.74s/it]

Epoch[4/10](1260/2835) || training loss 0.6092 || training accuracy 92.50% || lr 3.79e-05


 45%|████▍     | 1270/2835 [37:00<45:13,  1.73s/it]

Epoch[4/10](1270/2835) || training loss 0.6364 || training accuracy 90.83% || lr 3.8515e-05


 45%|████▌     | 1280/2835 [37:18<45:04,  1.74s/it]

Epoch[4/10](1280/2835) || training loss 0.6643 || training accuracy 84.17% || lr 3.913e-05


 46%|████▌     | 1290/2835 [37:35<44:55,  1.74s/it]

Epoch[4/10](1290/2835) || training loss 0.6083 || training accuracy 90.00% || lr 3.9745e-05


 46%|████▌     | 1300/2835 [37:53<46:02,  1.80s/it]

Epoch[4/10](1300/2835) || training loss 0.6299 || training accuracy 94.17% || lr 4.0360000000000007e-05


 46%|████▌     | 1310/2835 [38:10<43:23,  1.71s/it]

Epoch[4/10](1310/2835) || training loss 0.6235 || training accuracy 94.17% || lr 4.0975e-05


 47%|████▋     | 1320/2835 [38:27<43:11,  1.71s/it]

Epoch[4/10](1320/2835) || training loss 0.6191 || training accuracy 96.67% || lr 4.159e-05


 47%|████▋     | 1330/2835 [38:44<43:43,  1.74s/it]

Epoch[4/10](1330/2835) || training loss 0.6475 || training accuracy 90.83% || lr 4.2205e-05


 47%|████▋     | 1340/2835 [39:02<43:25,  1.74s/it]

Epoch[4/10](1340/2835) || training loss 0.6597 || training accuracy 90.00% || lr 4.282e-05


 48%|████▊     | 1350/2835 [39:19<43:03,  1.74s/it]

Epoch[4/10](1350/2835) || training loss 0.6355 || training accuracy 91.67% || lr 4.3435e-05


 48%|████▊     | 1360/2835 [39:36<42:54,  1.75s/it]

Epoch[4/10](1360/2835) || training loss 0.6429 || training accuracy 92.50% || lr 4.405e-05


 48%|████▊     | 1370/2835 [39:54<42:39,  1.75s/it]

Epoch[4/10](1370/2835) || training loss 0.612 || training accuracy 94.17% || lr 4.4665e-05


 49%|████▊     | 1380/2835 [40:11<42:13,  1.74s/it]

Epoch[4/10](1380/2835) || training loss 0.6537 || training accuracy 91.67% || lr 4.528e-05


 49%|████▉     | 1390/2835 [40:29<41:47,  1.74s/it]

Epoch[4/10](1390/2835) || training loss 0.5687 || training accuracy 96.67% || lr 4.5895e-05


 49%|████▉     | 1400/2835 [40:46<41:36,  1.74s/it]

Epoch[4/10](1400/2835) || training loss 0.5918 || training accuracy 95.00% || lr 4.651e-05


 50%|████▉     | 1410/2835 [41:03<41:26,  1.74s/it]

Epoch[4/10](1410/2835) || training loss 0.6396 || training accuracy 89.17% || lr 4.7125e-05


 50%|█████     | 1420/2835 [41:21<41:21,  1.75s/it]

Epoch[4/10](1420/2835) || training loss 0.5981 || training accuracy 97.50% || lr 4.774e-05


 50%|█████     | 1430/2835 [41:38<40:42,  1.74s/it]

Epoch[4/10](1430/2835) || training loss 0.6328 || training accuracy 93.33% || lr 4.8355e-05


 51%|█████     | 1440/2835 [41:56<39:40,  1.71s/it]

Epoch[4/10](1440/2835) || training loss 0.6371 || training accuracy 91.67% || lr 4.897e-05


 51%|█████     | 1450/2835 [42:13<40:09,  1.74s/it]

Epoch[4/10](1450/2835) || training loss 0.6178 || training accuracy 93.33% || lr 4.9585e-05


 51%|█████▏    | 1460/2835 [42:30<38:58,  1.70s/it]

Epoch[4/10](1460/2835) || training loss 0.6562 || training accuracy 92.50% || lr 5.02e-05


 52%|█████▏    | 1470/2835 [42:47<39:05,  1.72s/it]

Epoch[4/10](1470/2835) || training loss 0.6035 || training accuracy 94.17% || lr 5.0815e-05


 52%|█████▏    | 1480/2835 [43:05<39:24,  1.74s/it]

Epoch[4/10](1480/2835) || training loss 0.6234 || training accuracy 95.83% || lr 5.143e-05


 53%|█████▎    | 1490/2835 [43:22<38:59,  1.74s/it]

Epoch[4/10](1490/2835) || training loss 0.6255 || training accuracy 94.17% || lr 5.2045e-05


 53%|█████▎    | 1500/2835 [43:39<38:46,  1.74s/it]

Epoch[4/10](1500/2835) || training loss 0.62 || training accuracy 94.17% || lr 5.266e-05


 53%|█████▎    | 1510/2835 [43:57<37:21,  1.69s/it]

Epoch[4/10](1510/2835) || training loss 0.631 || training accuracy 95.00% || lr 5.3275e-05


 54%|█████▎    | 1520/2835 [44:14<38:02,  1.74s/it]

Epoch[4/10](1520/2835) || training loss 0.6368 || training accuracy 93.33% || lr 5.389e-05


 54%|█████▍    | 1530/2835 [44:31<37:43,  1.73s/it]

Epoch[4/10](1530/2835) || training loss 0.6364 || training accuracy 91.67% || lr 5.4505e-05


 54%|█████▍    | 1540/2835 [44:49<37:34,  1.74s/it]

Epoch[4/10](1540/2835) || training loss 0.6523 || training accuracy 92.50% || lr 5.512e-05


 55%|█████▍    | 1550/2835 [45:06<36:47,  1.72s/it]

Epoch[4/10](1550/2835) || training loss 0.608 || training accuracy 95.83% || lr 5.5735e-05


 55%|█████▌    | 1560/2835 [45:23<36:44,  1.73s/it]

Epoch[4/10](1560/2835) || training loss 0.6308 || training accuracy 95.00% || lr 5.635e-05


 55%|█████▌    | 1570/2835 [45:41<36:35,  1.74s/it]

Epoch[4/10](1570/2835) || training loss 0.6411 || training accuracy 92.50% || lr 5.6965e-05


 56%|█████▌    | 1580/2835 [45:58<36:12,  1.73s/it]

Epoch[4/10](1580/2835) || training loss 0.6311 || training accuracy 90.00% || lr 5.758e-05


 56%|█████▌    | 1590/2835 [46:15<35:45,  1.72s/it]

Epoch[4/10](1590/2835) || training loss 0.6848 || training accuracy 93.33% || lr 5.8195e-05


 56%|█████▋    | 1600/2835 [46:32<35:43,  1.74s/it]

Epoch[4/10](1600/2835) || training loss 0.5864 || training accuracy 95.00% || lr 5.881e-05


 57%|█████▋    | 1610/2835 [46:50<35:31,  1.74s/it]

Epoch[4/10](1610/2835) || training loss 0.6493 || training accuracy 91.67% || lr 5.9425e-05


 57%|█████▋    | 1620/2835 [47:07<34:08,  1.69s/it]

Epoch[4/10](1620/2835) || training loss 0.6015 || training accuracy 92.50% || lr 6.004e-05


 57%|█████▋    | 1630/2835 [47:24<34:19,  1.71s/it]

Epoch[4/10](1630/2835) || training loss 0.6235 || training accuracy 91.67% || lr 6.0655e-05


 58%|█████▊    | 1640/2835 [47:41<34:34,  1.74s/it]

Epoch[4/10](1640/2835) || training loss 0.6133 || training accuracy 94.17% || lr 6.127e-05


 58%|█████▊    | 1650/2835 [47:59<34:18,  1.74s/it]

Epoch[4/10](1650/2835) || training loss 0.6299 || training accuracy 92.50% || lr 6.188500000000001e-05


 59%|█████▊    | 1660/2835 [48:16<34:49,  1.78s/it]

Epoch[4/10](1660/2835) || training loss 0.6269 || training accuracy 92.50% || lr 6.25e-05


 59%|█████▉    | 1670/2835 [48:33<32:45,  1.69s/it]

Epoch[4/10](1670/2835) || training loss 0.6612 || training accuracy 90.83% || lr 6.249620644881107e-05


 59%|█████▉    | 1680/2835 [48:50<32:16,  1.68s/it]

Epoch[4/10](1680/2835) || training loss 0.6156 || training accuracy 96.67% || lr 6.248482673124625e-05


 60%|█████▉    | 1690/2835 [49:07<32:45,  1.72s/it]

Epoch[4/10](1690/2835) || training loss 0.6472 || training accuracy 93.33% || lr 6.246586365508058e-05


 60%|█████▉    | 1700/2835 [49:25<32:54,  1.74s/it]

Epoch[4/10](1700/2835) || training loss 0.6281 || training accuracy 93.33% || lr 6.243932189916935e-05


 60%|██████    | 1710/2835 [49:42<32:38,  1.74s/it]

Epoch[4/10](1710/2835) || training loss 0.595 || training accuracy 95.83% || lr 6.240520801229369e-05


 61%|██████    | 1720/2835 [49:59<32:15,  1.74s/it]

Epoch[4/10](1720/2835) || training loss 0.6334 || training accuracy 90.83% || lr 6.236353041154471e-05


 61%|██████    | 1730/2835 [50:17<31:30,  1.71s/it]

Epoch[4/10](1730/2835) || training loss 0.5884 || training accuracy 93.33% || lr 6.231429938024678e-05


 61%|██████▏   | 1740/2835 [50:34<31:31,  1.73s/it]

Epoch[4/10](1740/2835) || training loss 0.588 || training accuracy 96.67% || lr 6.22575270654202e-05


 62%|██████▏   | 1750/2835 [50:51<31:21,  1.73s/it]

Epoch[4/10](1750/2835) || training loss 0.6169 || training accuracy 94.17% || lr 6.219322747478415e-05


 62%|██████▏   | 1760/2835 [51:08<30:11,  1.69s/it]

Epoch[4/10](1760/2835) || training loss 0.6053 || training accuracy 91.67% || lr 6.212141647330049e-05


 62%|██████▏   | 1770/2835 [51:25<30:18,  1.71s/it]

Epoch[4/10](1770/2835) || training loss 0.5954 || training accuracy 92.50% || lr 6.20421117792593e-05


 63%|██████▎   | 1780/2835 [51:43<31:10,  1.77s/it]

Epoch[4/10](1780/2835) || training loss 0.6601 || training accuracy 90.00% || lr 6.195533295990719e-05


 63%|██████▎   | 1790/2835 [52:00<30:01,  1.72s/it]

Epoch[4/10](1790/2835) || training loss 0.6178 || training accuracy 92.50% || lr 6.18611014266193e-05


 63%|██████▎   | 1800/2835 [52:17<29:58,  1.74s/it]

Epoch[4/10](1800/2835) || training loss 0.64 || training accuracy 92.50% || lr 6.175944042961649e-05


 64%|██████▍   | 1810/2835 [52:35<29:41,  1.74s/it]

Epoch[4/10](1810/2835) || training loss 0.5977 || training accuracy 91.67% || lr 6.165037505222855e-05


 64%|██████▍   | 1820/2835 [52:52<29:16,  1.73s/it]

Epoch[4/10](1820/2835) || training loss 0.5955 || training accuracy 94.17% || lr 6.15339322047054e-05


 65%|██████▍   | 1830/2835 [53:09<29:05,  1.74s/it]

Epoch[4/10](1830/2835) || training loss 0.6454 || training accuracy 90.83% || lr 6.14101406175773e-05


 65%|██████▍   | 1840/2835 [53:27<29:00,  1.75s/it]

Epoch[4/10](1840/2835) || training loss 0.6204 || training accuracy 90.83% || lr 6.1279030834566e-05


 65%|██████▌   | 1850/2835 [53:44<27:21,  1.67s/it]

Epoch[4/10](1850/2835) || training loss 0.6127 || training accuracy 89.17% || lr 6.114063520504865e-05


 66%|██████▌   | 1860/2835 [54:01<28:10,  1.73s/it]

Epoch[4/10](1860/2835) || training loss 0.6407 || training accuracy 90.00% || lr 6.099498787607598e-05


 66%|██████▌   | 1870/2835 [54:18<27:58,  1.74s/it]

Epoch[4/10](1870/2835) || training loss 0.6074 || training accuracy 95.83% || lr 6.084212478394702e-05


 66%|██████▋   | 1880/2835 [54:36<27:14,  1.71s/it]

Epoch[4/10](1880/2835) || training loss 0.5879 || training accuracy 91.67% || lr 6.0682083645342435e-05


 67%|██████▋   | 1890/2835 [54:53<27:28,  1.74s/it]

Epoch[4/10](1890/2835) || training loss 0.6171 || training accuracy 95.83% || lr 6.0514903948018425e-05


 67%|██████▋   | 1900/2835 [55:10<27:37,  1.77s/it]

Epoch[4/10](1900/2835) || training loss 0.6461 || training accuracy 91.67% || lr 6.034062694106373e-05


 67%|██████▋   | 1910/2835 [55:28<27:33,  1.79s/it]

Epoch[4/10](1910/2835) || training loss 0.6618 || training accuracy 88.33% || lr 6.015929562472207e-05


 68%|██████▊   | 1920/2835 [55:45<26:16,  1.72s/it]

Epoch[4/10](1920/2835) || training loss 0.6066 || training accuracy 94.17% || lr 5.9970954739782415e-05


 68%|██████▊   | 1930/2835 [56:02<25:57,  1.72s/it]

Epoch[4/10](1930/2835) || training loss 0.6082 || training accuracy 92.50% || lr 5.977565075653994e-05


 68%|██████▊   | 1940/2835 [56:19<25:56,  1.74s/it]

Epoch[4/10](1940/2835) || training loss 0.6311 || training accuracy 89.17% || lr 5.9573431863330104e-05


 69%|██████▉   | 1950/2835 [56:37<25:05,  1.70s/it]

Epoch[4/10](1950/2835) || training loss 0.6017 || training accuracy 93.33% || lr 5.936434795463893e-05


 69%|██████▉   | 1960/2835 [56:54<25:22,  1.74s/it]

Epoch[4/10](1960/2835) || training loss 0.6192 || training accuracy 87.50% || lr 5.914845061879231e-05


 69%|██████▉   | 1970/2835 [57:11<24:48,  1.72s/it]

Epoch[4/10](1970/2835) || training loss 0.6309 || training accuracy 89.17% || lr 5.892579312522733e-05


 70%|██████▉   | 1980/2835 [57:29<25:33,  1.79s/it]

Epoch[4/10](1980/2835) || training loss 0.6542 || training accuracy 91.67% || lr 5.86964304113488e-05


 70%|███████   | 1990/2835 [57:46<23:43,  1.68s/it]

Epoch[4/10](1990/2835) || training loss 0.6646 || training accuracy 92.50% || lr 5.8460419068974386e-05


 71%|███████   | 2000/2835 [58:03<23:52,  1.72s/it]

Epoch[4/10](2000/2835) || training loss 0.5857 || training accuracy 97.50% || lr 5.8217817330371266e-05


 71%|███████   | 2010/2835 [58:21<23:53,  1.74s/it]

Epoch[4/10](2010/2835) || training loss 0.6038 || training accuracy 95.83% || lr 5.7968685053888336e-05


 71%|███████▏  | 2020/2835 [58:38<23:37,  1.74s/it]

Epoch[4/10](2020/2835) || training loss 0.6249 || training accuracy 95.83% || lr 5.771308370918696e-05


 72%|███████▏  | 2030/2835 [58:55<22:44,  1.70s/it]

Epoch[4/10](2030/2835) || training loss 0.6213 || training accuracy 92.50% || lr 5.7451076362074314e-05


 72%|███████▏  | 2040/2835 [59:12<22:30,  1.70s/it]

Epoch[4/10](2040/2835) || training loss 0.6058 || training accuracy 94.17% || lr 5.7182727658942774e-05


 72%|███████▏  | 2050/2835 [59:29<22:17,  1.70s/it]

Epoch[4/10](2050/2835) || training loss 0.6334 || training accuracy 90.83% || lr 5.690810381081947e-05


 73%|███████▎  | 2060/2835 [59:46<22:18,  1.73s/it]

Epoch[4/10](2060/2835) || training loss 0.6433 || training accuracy 86.67% || lr 5.662727257702964e-05


 73%|███████▎  | 2070/2835 [1:00:04<22:10,  1.74s/it]

Epoch[4/10](2070/2835) || training loss 0.6036 || training accuracy 95.00% || lr 5.634030324847804e-05


 73%|███████▎  | 2080/2835 [1:00:21<21:55,  1.74s/it]

Epoch[4/10](2080/2835) || training loss 0.6501 || training accuracy 93.33% || lr 5.6047266630552476e-05


 74%|███████▎  | 2090/2835 [1:00:38<21:37,  1.74s/it]

Epoch[4/10](2090/2835) || training loss 0.6345 || training accuracy 93.33% || lr 5.574823502565364e-05


 74%|███████▍  | 2100/2835 [1:00:56<21:15,  1.74s/it]

Epoch[4/10](2100/2835) || training loss 0.624 || training accuracy 96.67% || lr 5.5443282215355514e-05


 74%|███████▍  | 2110/2835 [1:01:13<21:02,  1.74s/it]

Epoch[4/10](2110/2835) || training loss 0.6681 || training accuracy 92.50% || lr 5.513248344220096e-05


 75%|███████▍  | 2120/2835 [1:01:31<20:35,  1.73s/it]

Epoch[4/10](2120/2835) || training loss 0.6241 || training accuracy 93.33% || lr 5.481591539113663e-05


 75%|███████▌  | 2130/2835 [1:01:48<20:20,  1.73s/it]

Epoch[4/10](2130/2835) || training loss 0.6162 || training accuracy 94.17% || lr 5.449365617059224e-05


 75%|███████▌  | 2140/2835 [1:02:05<20:18,  1.75s/it]

Epoch[4/10](2140/2835) || training loss 0.6115 || training accuracy 95.00% || lr 5.4165785293208405e-05


 76%|███████▌  | 2150/2835 [1:02:22<19:42,  1.73s/it]

Epoch[4/10](2150/2835) || training loss 0.6209 || training accuracy 93.33% || lr 5.383238365621806e-05


 76%|███████▌  | 2160/2835 [1:02:40<19:31,  1.74s/it]

Epoch[4/10](2160/2835) || training loss 0.6162 || training accuracy 90.83% || lr 5.349353352148633e-05


 77%|███████▋  | 2170/2835 [1:02:57<19:14,  1.74s/it]

Epoch[4/10](2170/2835) || training loss 0.6124 || training accuracy 95.00% || lr 5.3149318495213664e-05


 77%|███████▋  | 2180/2835 [1:03:14<18:32,  1.70s/it]

Epoch[4/10](2180/2835) || training loss 0.6227 || training accuracy 95.00% || lr 5.279982350730718e-05


 77%|███████▋  | 2190/2835 [1:03:31<18:01,  1.68s/it]

Epoch[4/10](2190/2835) || training loss 0.6211 || training accuracy 94.17% || lr 5.244513479042553e-05


 78%|███████▊  | 2200/2835 [1:03:48<18:05,  1.71s/it]

Epoch[4/10](2200/2835) || training loss 0.6337 || training accuracy 94.17% || lr 5.20853398587023e-05


 78%|███████▊  | 2210/2835 [1:04:06<17:42,  1.70s/it]

Epoch[4/10](2210/2835) || training loss 0.6086 || training accuracy 96.67% || lr 5.1720527486153145e-05


 78%|███████▊  | 2220/2835 [1:04:23<18:05,  1.77s/it]

Epoch[4/10](2220/2835) || training loss 0.6432 || training accuracy 94.17% || lr 5.135078768477221e-05


 79%|███████▊  | 2230/2835 [1:04:40<16:39,  1.65s/it]

Epoch[4/10](2230/2835) || training loss 0.6094 || training accuracy 93.33% || lr 5.097621168232294e-05


 79%|███████▉  | 2240/2835 [1:04:57<17:13,  1.74s/it]

Epoch[4/10](2240/2835) || training loss 0.587 || training accuracy 92.50% || lr 5.059689189982903e-05


 79%|███████▉  | 2250/2835 [1:05:15<16:59,  1.74s/it]

Epoch[4/10](2250/2835) || training loss 0.665 || training accuracy 86.67% || lr 5.021292192877093e-05


 80%|███████▉  | 2260/2835 [1:05:32<16:29,  1.72s/it]

Epoch[4/10](2260/2835) || training loss 0.6041 || training accuracy 93.33% || lr 4.982439650799355e-05


 80%|████████  | 2270/2835 [1:05:49<16:23,  1.74s/it]

Epoch[4/10](2270/2835) || training loss 0.5946 || training accuracy 97.50% || lr 4.943141150033082e-05


 80%|████████  | 2280/2835 [1:06:07<16:05,  1.74s/it]

Epoch[4/10](2280/2835) || training loss 0.6672 || training accuracy 90.00% || lr 4.903406386895302e-05


 81%|████████  | 2290/2835 [1:06:24<15:50,  1.74s/it]

Epoch[4/10](2290/2835) || training loss 0.6463 || training accuracy 95.00% || lr 4.8632451653442556e-05


 81%|████████  | 2300/2835 [1:06:41<15:30,  1.74s/it]

Epoch[4/10](2300/2835) || training loss 0.5998 || training accuracy 96.67% || lr 4.8226673945604146e-05


 81%|████████▏ | 2310/2835 [1:06:59<15:13,  1.74s/it]

Epoch[4/10](2310/2835) || training loss 0.6351 || training accuracy 89.17% || lr 4.7816830865015424e-05


 82%|████████▏ | 2320/2835 [1:07:16<14:53,  1.74s/it]

Epoch[4/10](2320/2835) || training loss 0.6058 || training accuracy 96.67% || lr 4.740302353432392e-05


 82%|████████▏ | 2330/2835 [1:07:34<14:37,  1.74s/it]

Epoch[4/10](2330/2835) || training loss 0.6697 || training accuracy 86.67% || lr 4.698535405429653e-05


 83%|████████▎ | 2340/2835 [1:07:51<14:20,  1.74s/it]

Epoch[4/10](2340/2835) || training loss 0.6131 || training accuracy 93.33% || lr 4.656392547862774e-05


 83%|████████▎ | 2350/2835 [1:08:08<13:57,  1.73s/it]

Epoch[4/10](2350/2835) || training loss 0.6381 || training accuracy 89.17% || lr 4.613884178851263e-05


 83%|████████▎ | 2360/2835 [1:08:25<13:34,  1.71s/it]

Epoch[4/10](2360/2835) || training loss 0.623 || training accuracy 93.33% || lr 4.5710207866991064e-05


 84%|████████▎ | 2370/2835 [1:08:43<13:18,  1.72s/it]

Epoch[4/10](2370/2835) || training loss 0.6189 || training accuracy 91.67% || lr 4.5278129473069394e-05


 84%|████████▍ | 2380/2835 [1:09:00<12:49,  1.69s/it]

Epoch[4/10](2380/2835) || training loss 0.6253 || training accuracy 91.67% || lr 4.4842713215625985e-05


 84%|████████▍ | 2390/2835 [1:09:17<12:47,  1.72s/it]

Epoch[4/10](2390/2835) || training loss 0.6424 || training accuracy 89.17% || lr 4.440406652710709e-05


 85%|████████▍ | 2400/2835 [1:09:34<12:34,  1.73s/it]

Epoch[4/10](2400/2835) || training loss 0.6578 || training accuracy 92.50% || lr 4.3962297637019506e-05


 85%|████████▌ | 2410/2835 [1:09:51<12:20,  1.74s/it]

Epoch[4/10](2410/2835) || training loss 0.6559 || training accuracy 90.83% || lr 4.3517515545226515e-05


 85%|████████▌ | 2420/2835 [1:10:09<11:59,  1.73s/it]

Epoch[4/10](2420/2835) || training loss 0.6382 || training accuracy 90.83% || lr 4.3069829995053844e-05


 86%|████████▌ | 2430/2835 [1:10:26<11:46,  1.75s/it]

Epoch[4/10](2430/2835) || training loss 0.6238 || training accuracy 97.50% || lr 4.261935144621216e-05


 86%|████████▌ | 2440/2835 [1:10:43<11:17,  1.72s/it]

Epoch[4/10](2440/2835) || training loss 0.6451 || training accuracy 96.67% || lr 4.216619104754272e-05


 86%|████████▋ | 2450/2835 [1:11:01<11:03,  1.72s/it]

Epoch[4/10](2450/2835) || training loss 0.6479 || training accuracy 91.67% || lr 4.1710460609593095e-05


 87%|████████▋ | 2460/2835 [1:11:18<10:50,  1.74s/it]

Epoch[4/10](2460/2835) || training loss 0.6274 || training accuracy 94.17% || lr 4.1252272577029637e-05


 87%|████████▋ | 2470/2835 [1:11:35<10:34,  1.74s/it]

Epoch[4/10](2470/2835) || training loss 0.5837 || training accuracy 97.50% || lr 4.079174000089335e-05


 87%|████████▋ | 2480/2835 [1:11:53<10:17,  1.74s/it]

Epoch[4/10](2480/2835) || training loss 0.6103 || training accuracy 92.50% || lr 4.0328976510706305e-05


 88%|████████▊ | 2490/2835 [1:12:10<10:00,  1.74s/it]

Epoch[4/10](2490/2835) || training loss 0.6315 || training accuracy 90.83% || lr 3.986409628643522e-05


 88%|████████▊ | 2500/2835 [1:12:28<09:35,  1.72s/it]

Epoch[4/10](2500/2835) || training loss 0.5973 || training accuracy 95.00% || lr 3.939721403031928e-05


 89%|████████▊ | 2510/2835 [1:12:45<09:22,  1.73s/it]

Epoch[4/10](2510/2835) || training loss 0.5924 || training accuracy 95.00% || lr 3.892844493856909e-05


 89%|████████▉ | 2520/2835 [1:13:02<09:07,  1.74s/it]

Epoch[4/10](2520/2835) || training loss 0.6318 || training accuracy 92.50% || lr 3.845790467294369e-05


 89%|████████▉ | 2530/2835 [1:13:20<08:50,  1.74s/it]

Epoch[4/10](2530/2835) || training loss 0.6355 || training accuracy 90.83% || lr 3.7985709332212755e-05


 90%|████████▉ | 2540/2835 [1:13:37<08:31,  1.73s/it]

Epoch[4/10](2540/2835) || training loss 0.6378 || training accuracy 93.33% || lr 3.751197542351103e-05


 90%|████████▉ | 2550/2835 [1:13:54<08:11,  1.73s/it]

Epoch[4/10](2550/2835) || training loss 0.6825 || training accuracy 92.50% || lr 3.7036819833591845e-05


 90%|█████████ | 2560/2835 [1:14:12<07:58,  1.74s/it]

Epoch[4/10](2560/2835) || training loss 0.6477 || training accuracy 92.50% || lr 3.65603597999871e-05


 91%|█████████ | 2570/2835 [1:14:29<07:41,  1.74s/it]

Epoch[4/10](2570/2835) || training loss 0.6332 || training accuracy 94.17% || lr 3.608271288208067e-05


 91%|█████████ | 2580/2835 [1:14:46<07:07,  1.68s/it]

Epoch[4/10](2580/2835) || training loss 0.6504 || training accuracy 95.00% || lr 3.560399693210235e-05


 91%|█████████▏| 2590/2835 [1:15:04<07:03,  1.73s/it]

Epoch[4/10](2590/2835) || training loss 0.5954 || training accuracy 95.00% || lr 3.5124330066049644e-05


 92%|█████████▏| 2600/2835 [1:15:21<06:45,  1.72s/it]

Epoch[4/10](2600/2835) || training loss 0.5987 || training accuracy 94.17% || lr 3.4643830634544315e-05


 92%|█████████▏| 2610/2835 [1:15:38<06:29,  1.73s/it]

Epoch[4/10](2610/2835) || training loss 0.6197 || training accuracy 90.00% || lr 3.4162617193631234e-05


 92%|█████████▏| 2620/2835 [1:15:56<06:15,  1.75s/it]

Epoch[4/10](2620/2835) || training loss 0.603 || training accuracy 95.00% || lr 3.368080847552639e-05


 93%|█████████▎| 2630/2835 [1:16:13<05:56,  1.74s/it]

Epoch[4/10](2630/2835) || training loss 0.5923 || training accuracy 95.83% || lr 3.319852335932151e-05


 93%|█████████▎| 2640/2835 [1:16:30<05:39,  1.74s/it]

Epoch[4/10](2640/2835) || training loss 0.5811 || training accuracy 93.33% || lr 3.2715880841652446e-05


 93%|█████████▎| 2650/2835 [1:16:48<05:21,  1.74s/it]

Epoch[4/10](2650/2835) || training loss 0.6107 || training accuracy 95.00% || lr 3.2233000007338485e-05


 94%|█████████▍| 2660/2835 [1:17:05<05:04,  1.74s/it]

Epoch[4/10](2660/2835) || training loss 0.5843 || training accuracy 95.00% || lr 3.175e-05


 94%|█████████▍| 2670/2835 [1:17:23<04:45,  1.73s/it]

Epoch[4/10](2670/2835) || training loss 0.6496 || training accuracy 89.17% || lr 3.126699999266152e-05


 95%|█████████▍| 2680/2835 [1:17:40<04:34,  1.77s/it]

Epoch[4/10](2680/2835) || training loss 0.6384 || training accuracy 94.17% || lr 3.078411915834756e-05


 95%|█████████▍| 2690/2835 [1:17:57<04:12,  1.74s/it]

Epoch[4/10](2690/2835) || training loss 0.617 || training accuracy 95.83% || lr 3.0301476640678485e-05


 95%|█████████▌| 2700/2835 [1:18:15<03:55,  1.74s/it]

Epoch[4/10](2700/2835) || training loss 0.6587 || training accuracy 93.33% || lr 2.9819191524473622e-05


 96%|█████████▌| 2710/2835 [1:18:32<03:37,  1.74s/it]

Epoch[4/10](2710/2835) || training loss 0.5938 || training accuracy 94.17% || lr 2.9337382806368775e-05


 96%|█████████▌| 2720/2835 [1:18:49<03:20,  1.74s/it]

Epoch[4/10](2720/2835) || training loss 0.5973 || training accuracy 94.17% || lr 2.8856169365455687e-05


 96%|█████████▋| 2730/2835 [1:19:07<03:06,  1.77s/it]

Epoch[4/10](2730/2835) || training loss 0.6439 || training accuracy 91.67% || lr 2.837566993395036e-05


 97%|█████████▋| 2740/2835 [1:19:24<02:45,  1.75s/it]

Epoch[4/10](2740/2835) || training loss 0.63 || training accuracy 93.33% || lr 2.789600306789765e-05


 97%|█████████▋| 2750/2835 [1:19:41<02:26,  1.72s/it]

Epoch[4/10](2750/2835) || training loss 0.6198 || training accuracy 90.83% || lr 2.7417287117919336e-05


 97%|█████████▋| 2760/2835 [1:19:59<02:10,  1.74s/it]

Epoch[4/10](2760/2835) || training loss 0.6424 || training accuracy 93.33% || lr 2.6939640200012903e-05


 98%|█████████▊| 2770/2835 [1:20:16<01:51,  1.72s/it]

Epoch[4/10](2770/2835) || training loss 0.6201 || training accuracy 94.17% || lr 2.6463180166408157e-05


 98%|█████████▊| 2780/2835 [1:20:33<01:32,  1.69s/it]

Epoch[4/10](2780/2835) || training loss 0.6208 || training accuracy 95.00% || lr 2.5988024576488972e-05


 98%|█████████▊| 2790/2835 [1:20:50<01:15,  1.68s/it]

Epoch[4/10](2790/2835) || training loss 0.6023 || training accuracy 95.83% || lr 2.5514290667787247e-05


 99%|█████████▉| 2800/2835 [1:21:07<00:59,  1.71s/it]

Epoch[4/10](2800/2835) || training loss 0.6098 || training accuracy 91.67% || lr 2.504209532705632e-05


 99%|█████████▉| 2810/2835 [1:21:24<00:43,  1.74s/it]

Epoch[4/10](2810/2835) || training loss 0.5811 || training accuracy 94.17% || lr 2.4571555061430913e-05


 99%|█████████▉| 2820/2835 [1:21:41<00:26,  1.74s/it]

Epoch[4/10](2820/2835) || training loss 0.5969 || training accuracy 95.00% || lr 2.410278596968072e-05


100%|█████████▉| 2830/2835 [1:21:59<00:08,  1.74s/it]

Epoch[4/10](2830/2835) || training loss 0.6057 || training accuracy 93.33% || lr 2.3635903713564787e-05


100%|██████████| 2835/2835 [1:22:08<00:00,  1.74s/it]

Calculating validation results...





New best model for val accuracy : 94.97%! saving the best model..


  0%|          | 0/2835 [00:00<?, ?it/s]

[Val] acc : 94.97%, loss: 0.62 || best acc : 94.97%, best loss: 0.62


  0%|          | 10/2835 [00:17<1:22:06,  1.74s/it]

Epoch[5/10](10/2835) || training loss 0.6017 || training accuracy 92.50% || lr 2.2939370003620206e-05


  0%|          | 14/2835 [00:26<1:28:02,  1.87s/it]


KeyboardInterrupt: 

In [5]:
test_dir = '../input/data/eval'


from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize,CenterCrop
class TestDataset(data.Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)


# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')


# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
'''
transform = Compose([
    Resize((512, 384), Image.BILINEAR),
    Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246),max_pixel_value=255.0,p=1.0),
    ToTensorV2(p=1.0)
])
'''
transform = transforms.Compose([
    ToTensor(),
    Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246)),
])
dataset = TestDataset(image_paths, transformer)

loader = data.DataLoader(
    dataset,
    shuffle=False
)
# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)


model_ft.eval()


# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in tqdm(loader):
    with torch.no_grad():
        images = images.to(device)
        pred = model_ft(images)
        mask_pred = pred[:,:3].argmax(dim=-1)
        gender_pred = pred[:,3:5].argmax(dim=-1)
        age_pred = pred[:,5:].argmax(dim=-1)
        pred = (mask_pred * 6 + gender_pred * 3 + age_pred).cpu().numpy()
        all_predictions.extend(pred)
submission['ans'] = all_predictions



# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission.csv'), index=False)

100%|██████████| 12600/12600 [14:53<00:00, 14.10it/s]
