In [1]:
import os
import time
import argparse

import torchvision
import torch
import torch.nn as nn

from util import AverageMeter
from encoder import SmallAlexNet
from align_uniform import align_loss, uniform_loss_prelog
from tqdm import tqdm
from collections import defaultdict
import copy

import matplotlib.pyplot as plt


class TwoAugUnsupervisedDatasetLbl(torch.utils.data.Dataset):
    r"""Returns two augmentation and no labels."""

    def __init__(self, dataset, transform, lblmap=None):
        self.dataset = dataset
        self.transform = transform
        self.lblmap = copy.deepcopy(lblmap)

    def __getitem__(self, index):
        image, lbl = self.dataset[index]
        lbl2return = lbl if self.lblmap is None else self.lblmap[lbl]
        return self.transform(image), self.transform(image), lbl2return

    def __len__(self):
        return len(self.dataset)

def parse_option():
    parser = argparse.ArgumentParser('STL-10 Representation Learning with Alignment and Uniformity Losses')

    parser.add_argument('--align_w', type=float, default=1, help='Alignment loss weight')
    parser.add_argument('--unif_w', type=float, default=1, help='Uniformity loss weight')
    parser.add_argument('--align_alpha', type=float, default=2, help='alpha in alignment loss')
    parser.add_argument('--unif_t', type=float, default=2, help='t in uniformity loss')

    parser.add_argument('--batch_size', type=int, default=256, help='Batch size')
    parser.add_argument('--epochs', type=int, default=400, help='Number of training epochs')
    parser.add_argument('--iter', type=int, default=0, help='Number of training epochs')
    parser.add_argument('--lr', type=float, default=None,
                        help='Learning rate. Default is linear scaling 0.12 per 256 batch size')
    parser.add_argument('--lr_decay_rate', type=float, default=0.1, help='Learning rate decay rate')
    parser.add_argument('--lr_decay_epochs', default=[155, 170, 185], nargs='*', type=int,
                        help='When to decay learning rate')
    parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum')
    parser.add_argument('--weight_decay', type=float, default=1e-4, help='L2 weight decay')
    parser.add_argument('--feat_dim', type=int, default=128, help='Feature dimensionality')

    parser.add_argument('--num_workers', type=int, default=4, help='Number of data loader workers to use')
    parser.add_argument('--log_interval', type=int, default=40, help='Number of iterations between logs')
    parser.add_argument('--gpus', default=[0], nargs='*', type=int,
                        help='List of GPU indices to use, e.g., --gpus 0 1 2 3')

    parser.add_argument('--data_folder', type=str, default='./data', help='Path to data')
    parser.add_argument('--result_folder', type=str, default='./results', help='Base directory to save model')

    opt = parser.parse_args("")

    if opt.lr is None:
        opt.lr = 0.12 * (opt.batch_size / 256)

    opt.gpus = list(map(lambda x: torch.device('cuda', x), opt.gpus))

    opt.save_folder = os.path.join(
        opt.result_folder,
        f"cifar100_400_sideinformation_align{opt.align_w:g}alpha{opt.align_alpha:g}_unif{opt.unif_w:g}t{opt.unif_t:g}_iter{opt.iter}"
    )
    os.makedirs(opt.save_folder, exist_ok=True)

    return opt


opt = parse_option()

In [2]:
transform = torchvision.transforms.Compose([
        torchvision.transforms.RandomResizedCrop(32, scale=(0.08, 1)),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
        torchvision.transforms.RandomGrayscale(p=0.2),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            (0.44087801806139126, 0.42790631331699347, 0.3867879370752931),
            (0.26826768628079806, 0.2610450402318512, 0.26866836876860795),
        ),
    ])

old_lbls = list(range(100))
labels_2_keep = list(range(25))
# labels_2_keep = [0,1,2,3]

old2new = {}
count = 0
for old_lbl in old_lbls:
    if old_lbl in labels_2_keep: 
        old2new[old_lbl] = count
        count += 1

for old_lbl in old_lbls:
    if old_lbl not in labels_2_keep: 
        old2new[old_lbl] = count

new_lbls = list(range(count+1))

In [None]:
def get_data_loader(opt):
    transform = torchvision.transforms.Compose([
        torchvision.transforms.RandomResizedCrop(32, scale=(0.08, 1)),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
        torchvision.transforms.RandomGrayscale(p=0.2),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            (0.44087801806139126, 0.42790631331699347, 0.3867879370752931),
            (0.26826768628079806, 0.2610450402318512, 0.26866836876860795),
        ),
    ])
    dataset = TwoAugUnsupervisedDatasetLbl(
        torchvision.datasets.CIFAR100(opt.data_folder, 'train', download=True), 
        transform=transform, 
        lblmap=old2new )
    
    return torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, num_workers=opt.num_workers,
                                       shuffle=True, pin_memory=True)


print(f'Optimize: {opt.align_w:g} * loss_align(alpha={opt.align_alpha:g}) + {opt.unif_w:g} * loss_uniform(t={opt.unif_t:g})')

torch.cuda.set_device(opt.gpus[0])
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

encoder = SmallAlexNet(feat_dim=opt.feat_dim, cifar=True).to(opt.gpus[0])

optim = torch.optim.Adam(encoder.parameters(), lr=1e-2)

loader = get_data_loader(opt)
align_meter = AverageMeter('align_loss')
unif_meter = AverageMeter('uniform_loss')
loss_meter = AverageMeter('total_loss')
it_time_meter = AverageMeter('iter_time')

for epoch in range(opt.epochs):
    align_meter.reset()
    unif_meter.reset()
    loss_meter.reset()
    it_time_meter.reset()
    t0 = time.time()
    for ii, (im_x, im_y, lbl) in enumerate(loader):
        optim.zero_grad()
        x, y = encoder(torch.cat([im_x.to(opt.gpus[0]), im_y.to(opt.gpus[0])])).chunk(2)
        
        align_loss_val = align_loss(x, y, alpha=opt.align_alpha)
        # group according to new_lbls

        z = torch.cat( [x, y])
        lbl_z = torch.cat([lbl, lbl])
        unif_losses = torch.cat([uniform_loss_prelog(z[lbl_z==new_lbl]) for new_lbl in new_lbls])
        unif_loss_val = torch.log( torch.mean(unif_losses) )
        
        loss = align_loss_val * opt.align_w + unif_loss_val * opt.unif_w
        align_meter.update(align_loss_val, x.shape[0])
        unif_meter.update(unif_loss_val)
        loss_meter.update(loss, x.shape[0])
        loss.backward()
        optim.step()
        it_time_meter.update(time.time() - t0)
        if ii % opt.log_interval == 0:
            print(f"Epoch {epoch}/{opt.epochs}\tIt {ii}/{len(loader)}\t" +
                  f"{align_meter}\t{unif_meter}\t{loss_meter}\t{it_time_meter}")
        t0 = time.time()

ckpt_file = os.path.join(opt.save_folder, 'encoder.pth')
torch.save(encoder.state_dict(), ckpt_file)
print(f'Saved to {ckpt_file}')

Optimize: 1 * loss_align(alpha=2) + 1 * loss_uniform(t=2)
Files already downloaded and verified
Epoch 0/400	It 0/196	align_loss 1.170543 (1.170543)	uniform_loss -2.484235 (-2.484235)	total_loss -1.313692 (-1.313692)	iter_time 3.413382 (3.413382)


In [None]:
"""
    Here we  do the linear evaluation, the old labels are provided to the linear objective as one hot
"""
import time
import argparse

import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F

from util import AverageMeter
from encoder import SmallAlexNet
MODEL_2_LOAD = "./results/base_200_sideinformation_align1alpha2_unif1t2_iter0/encoder.pth"
MODEL_2_LOAD = "./results/base_200_4_sideinformation_align1alpha2_unif1t2_iter0/encoder.pth"
MODEL_2_LOAD="./results/manual_labels_align1alpha2_unif1t2_iter0/encoder.pth"
MODEL_2_LOAD="./results/cifar100_manual_labels_align1alpha2_unif1t2_iter0/encoder.pth"
MODEL_2_LOAD="./results/cifar100_sideinformation_align1alpha2_unif1t2_iter0/encoder.pth"

USE_MOD_LBL = False
encoder = SmallAlexNet(feat_dim=opt.feat_dim, cifar=True).to(opt.gpus[0])
encoder.load_state_dict(torch.load(MODEL_2_LOAD))

In [10]:
def parse_option():
    parser = argparse.ArgumentParser('STL-10 Representation Learning with Alignment and Uniformity Losses')

    parser.add_argument('--encoder_checkpoint', type=str, help='Encoder checkpoint to evaluate', default=MODEL_2_LOAD)
    parser.add_argument('--feat_dim', type=int, default=128, help='Encoder feature dimensionality')
    parser.add_argument('--layer_index', type=int, default=-2, help='Evaluation layer')

    parser.add_argument('--batch_size', type=int, default=128, help='Batch size')
    parser.add_argument('--epochs', type=int, default=100, help='Number of training epochs')
    parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate')
    parser.add_argument('--lr_decay_rate', type=float, default=0.2, help='Learning rate decay rate')
    parser.add_argument('--lr_decay_epochs', type=str, default='60,80', help='When to decay learning rate')

    parser.add_argument('--num_workers', type=int, default=6, help='Number of data loader workers to use')
    parser.add_argument('--log_interval', type=int, default=40, help='Number of iterations between logs')
    parser.add_argument('--gpus', default=[0], nargs='*', type=int,
                        help='List of GPU indices to use, e.g., --gpus 0 1 2 3')

    parser.add_argument('--data_folder', type=str, default='./data', help='Path to data')

    opt = parser.parse_args("")

    if opt.lr is None:
        opt.lr = 0.12 * (opt.batch_size / 256)

    opt.gpu = torch.device('cuda', opt.gpus[0])
    opt.lr_decay_epochs = list(map(int, opt.lr_decay_epochs.split(',')))

    return opt


class DatasetModifiedLbl(torch.utils.data.Dataset):
    r"""Returns two augmentation and no labels."""

    def __init__(self, dataset, lblmap=None):
        self.dataset = dataset
        self.lblmap = copy.deepcopy(lblmap)

    def __getitem__(self, index):
        image, lbl = self.dataset[index]
        lbl2return = lbl if self.lblmap is None else self.lblmap[lbl]
        return image, lbl2return

    def __len__(self):
        return len(self.dataset)

In [11]:
class DatasetModifiedLblandLbl(torch.utils.data.Dataset):
    r"""Returns two augmentation and no labels."""

    def __init__(self, dataset, lblmap):
        self.dataset = dataset
        self.lblmap = copy.deepcopy(lblmap)

    def __getitem__(self, index):
        image, lbl = self.dataset[index]
        return image, self.lblmap[lbl], lbl

    def __len__(self):
        return len(self.dataset)

In [12]:
def get_data_loaders(opt):
    train_transform = torchvision.transforms.Compose([
        torchvision.transforms.RandomResizedCrop(32, scale=(0.08, 1)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            (0.44087801806139126, 0.42790631331699347, 0.3867879370752931),
            (0.26826768628079806, 0.2610450402318512, 0.26866836876860795),
        ),
        torchvision.transforms.RandomHorizontalFlip()
    ])
    val_transform = torchvision.transforms.Compose([
        torchvision.transforms.Resize(70),
        torchvision.transforms.CenterCrop(32),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            (0.44087801806139126, 0.42790631331699347, 0.3867879370752931),
            (0.26826768628079806, 0.2610450402318512, 0.26866836876860795),
        ),
    ])
    train_dataset = DatasetModifiedLblandLbl( torchvision.datasets.CIFAR100(opt.data_folder, 'train', download=True, transform=train_transform), lblmap=old2new)
    val_dataset =  DatasetModifiedLblandLbl( torchvision.datasets.CIFAR100(opt.data_folder, 'test', transform=val_transform), lblmap=old2new)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batch_size,
                                               num_workers=opt.num_workers, shuffle=True, pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=opt.batch_size,
                                             num_workers=opt.num_workers, pin_memory=True)
    return train_loader, val_loader


def validate_comb(opt, encoder, classifier, val_loader):
    correct = 0
    with torch.no_grad():
        for images, labels_mod, labels_act in val_loader:
            pred = classifier(torch.cat( (encoder(images.to(opt.gpus[0]), layer_index=opt.layer_index).flatten(1), torch.nn.functional.one_hot(labels_mod.to(opt.gpus[0]), num_classes=len(labels_2_keep)+1)), dim=1)).argmax(dim=1)
            correct += (pred.cpu() == labels_act).sum().item()
    return correct / len(val_loader.dataset)

def validate(opt, encoder, classifier, val_loader):
    correct = 0
    with torch.no_grad():
        for images, labels_mod, labels_act in val_loader:
            pred = classifier( encoder(images.to(opt.gpus[0]), layer_index=opt.layer_index).flatten(1) ).argmax(dim=1)
            correct += (pred.cpu() == labels_act).sum().item()
    return correct / len(val_loader.dataset)

In [13]:
opt.gpu=opt.gpus[0]

In [14]:
opt = parse_option()

opt.gpu=opt.gpus[0]
torch.cuda.set_device(opt.gpus[0])
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

encoder.eval()
train_loader, val_loader = get_data_loaders(opt)

with torch.no_grad():
    sample, _ = train_loader.dataset.dataset[0]
    eval_numel = encoder(sample.unsqueeze(0).to(opt.gpus[0]), layer_index=opt.layer_index).numel()
print(f'Feature dimension: {eval_numel}')


classifier = nn.Linear(eval_numel, 100).to(opt.gpus[0]) if not USE_MOD_LBL else nn.Linear( eval_numel + len(labels_2_keep) + 1,100).to(opt.gpus[0])

optim = torch.optim.Adam(classifier.parameters(), lr=opt.lr, betas=(0.5, 0.999))
val_accs = []
loss_meter = AverageMeter('loss')
it_time_meter = AverageMeter('iter_time')
for epoch in tqdm(range(opt.epochs)):
    loss_meter.reset()
    it_time_meter.reset()
    t0 = time.time()
    for ii, (images, labels_mod, labels) in enumerate(train_loader):
        optim.zero_grad()
        with torch.no_grad():
            feats = encoder(images.to(opt.gpus[0]), layer_index=opt.layer_index).flatten(1)

        if USE_MOD_LBL:
            logits = classifier(torch.cat( (feats, torch.nn.functional.one_hot(labels_mod.to(opt.gpus[0]), num_classes=len(labels_2_keep)+1 )),dim=1))
        else:
            logits = classifier(feats)
        
        loss = F.cross_entropy(logits, labels.to(opt.gpus[0]))
        loss_meter.update(loss, images.shape[0])
        loss.backward()
        optim.step()
        it_time_meter.update(time.time() - t0)
        if ii % opt.log_interval == 0:
            print(f"Epoch {epoch}/{opt.epochs}\tIt {ii}/{len(train_loader)}\t{loss_meter}\t{it_time_meter}")
        t0 = time.time()
    val_acc = validate_comb(opt,encoder,classifier,val_loader) if USE_MOD_LBL else validate(opt, encoder, classifier, val_loader) 
    val_accs.append(val_acc)
    print(f"Epoch {epoch}/{opt.epochs}\tval_acc {val_acc*100:.4g}%")
print(f"Best validation accuracy {max(val_accs)}")

Files already downloaded and verified
Feature dimension: 4096


  0%|                                                                                                                                                  | 0/100 [00:00<?, ?it/s]

Epoch 0/100	It 0/391	loss 4.607521 (4.607521)	iter_time 0.099628 (0.099628)
Epoch 0/100	It 40/391	loss 3.905118 (4.197085)	iter_time 0.005078 (0.010257)
Epoch 0/100	It 80/391	loss 3.484926 (3.905156)	iter_time 0.005043 (0.009125)
Epoch 0/100	It 120/391	loss 3.272768 (3.715056)	iter_time 0.005205 (0.008926)
Epoch 0/100	It 160/391	loss 3.061009 (3.562499)	iter_time 0.005024 (0.008908)
Epoch 0/100	It 200/391	loss 2.903367 (3.454365)	iter_time 0.004706 (0.008753)
Epoch 0/100	It 240/391	loss 2.838861 (3.358420)	iter_time 0.009608 (0.008611)
Epoch 0/100	It 280/391	loss 2.703639 (3.285984)	iter_time 0.030228 (0.008636)
Epoch 0/100	It 320/391	loss 2.756047 (3.223621)	iter_time 0.029022 (0.008637)
Epoch 0/100	It 360/391	loss 2.799281 (3.171773)	iter_time 0.004683 (0.008645)


  1%|█▍                                                                                                                                        | 1/100 [00:06<10:16,  6.23s/it]

Epoch 0/100	val_acc 32.84%
Epoch 1/100	It 0/391	loss 2.739886 (2.739886)	iter_time 0.103920 (0.103920)
Epoch 1/100	It 40/391	loss 2.705095 (2.662412)	iter_time 0.004777 (0.011043)
Epoch 1/100	It 80/391	loss 2.461795 (2.632401)	iter_time 0.018184 (0.010019)
Epoch 1/100	It 120/391	loss 2.613197 (2.632971)	iter_time 0.004960 (0.009194)
Epoch 1/100	It 160/391	loss 2.644512 (2.631217)	iter_time 0.019723 (0.008979)
Epoch 1/100	It 200/391	loss 2.579678 (2.625735)	iter_time 0.005044 (0.008762)
Epoch 1/100	It 240/391	loss 2.530645 (2.615306)	iter_time 0.004725 (0.008609)
Epoch 1/100	It 280/391	loss 2.685738 (2.610246)	iter_time 0.016864 (0.008615)
Epoch 1/100	It 320/391	loss 2.395944 (2.601833)	iter_time 0.004920 (0.008676)
Epoch 1/100	It 360/391	loss 2.483490 (2.593659)	iter_time 0.004784 (0.008666)


  2%|██▊                                                                                                                                       | 2/100 [00:12<10:07,  6.20s/it]

Epoch 1/100	val_acc 35.37%
Epoch 2/100	It 0/391	loss 2.739130 (2.739130)	iter_time 0.098551 (0.098551)
Epoch 2/100	It 40/391	loss 2.381817 (2.482710)	iter_time 0.004825 (0.010527)
Epoch 2/100	It 80/391	loss 2.375330 (2.474176)	iter_time 0.004902 (0.009330)
Epoch 2/100	It 120/391	loss 2.222826 (2.475822)	iter_time 0.004712 (0.009001)
Epoch 2/100	It 160/391	loss 2.176385 (2.478808)	iter_time 0.005133 (0.008907)
Epoch 2/100	It 200/391	loss 2.658645 (2.475097)	iter_time 0.004874 (0.008775)
Epoch 2/100	It 240/391	loss 2.457829 (2.470846)	iter_time 0.005016 (0.008699)
Epoch 2/100	It 280/391	loss 2.190024 (2.467911)	iter_time 0.005036 (0.008593)
Epoch 2/100	It 320/391	loss 2.406237 (2.463467)	iter_time 0.005367 (0.008456)
Epoch 2/100	It 360/391	loss 2.348676 (2.463449)	iter_time 0.021386 (0.008451)


  3%|████▏                                                                                                                                     | 3/100 [00:18<09:55,  6.14s/it]

Epoch 2/100	val_acc 36.71%
Epoch 3/100	It 0/391	loss 2.502881 (2.502881)	iter_time 0.099500 (0.099500)
Epoch 3/100	It 40/391	loss 2.293525 (2.351550)	iter_time 0.004808 (0.011218)
Epoch 3/100	It 80/391	loss 2.459039 (2.376831)	iter_time 0.017932 (0.010174)
Epoch 3/100	It 120/391	loss 2.859833 (2.376492)	iter_time 0.005298 (0.009227)
Epoch 3/100	It 160/391	loss 2.406574 (2.384044)	iter_time 0.010987 (0.009137)
Epoch 3/100	It 200/391	loss 2.653572 (2.385927)	iter_time 0.004974 (0.008780)
Epoch 3/100	It 240/391	loss 2.327132 (2.383910)	iter_time 0.004986 (0.008693)
Epoch 3/100	It 280/391	loss 2.261758 (2.391434)	iter_time 0.010283 (0.008640)
Epoch 3/100	It 320/391	loss 2.393767 (2.390467)	iter_time 0.004995 (0.008561)
Epoch 3/100	It 360/391	loss 2.250165 (2.390850)	iter_time 0.004966 (0.008493)


  4%|█████▌                                                                                                                                    | 4/100 [00:24<09:46,  6.11s/it]

Epoch 3/100	val_acc 37.6%
Epoch 4/100	It 0/391	loss 2.300701 (2.300701)	iter_time 0.118571 (0.118571)
Epoch 4/100	It 40/391	loss 2.544976 (2.318841)	iter_time 0.007751 (0.010482)
Epoch 4/100	It 80/391	loss 2.496604 (2.328153)	iter_time 0.004864 (0.009432)
Epoch 4/100	It 120/391	loss 2.177196 (2.337068)	iter_time 0.005006 (0.009067)
Epoch 4/100	It 160/391	loss 2.414633 (2.344266)	iter_time 0.008915 (0.008852)
Epoch 4/100	It 200/391	loss 2.384373 (2.350565)	iter_time 0.004658 (0.008639)
Epoch 4/100	It 240/391	loss 2.492121 (2.345095)	iter_time 0.004786 (0.008519)
Epoch 4/100	It 280/391	loss 2.291677 (2.344655)	iter_time 0.005098 (0.008486)
Epoch 4/100	It 320/391	loss 2.348954 (2.344007)	iter_time 0.004773 (0.008512)
Epoch 4/100	It 360/391	loss 2.132143 (2.344082)	iter_time 0.004963 (0.008439)


  5%|██████▉                                                                                                                                   | 5/100 [00:30<09:40,  6.11s/it]

Epoch 4/100	val_acc 38.26%
Epoch 5/100	It 0/391	loss 2.320901 (2.320901)	iter_time 0.119988 (0.119988)
Epoch 5/100	It 40/391	loss 2.421325 (2.294241)	iter_time 0.004957 (0.010089)
Epoch 5/100	It 80/391	loss 2.307942 (2.298982)	iter_time 0.007914 (0.009082)
Epoch 5/100	It 120/391	loss 2.584363 (2.294641)	iter_time 0.005262 (0.008652)
Epoch 5/100	It 160/391	loss 2.356166 (2.304600)	iter_time 0.004943 (0.008528)
Epoch 5/100	It 200/391	loss 2.220845 (2.307098)	iter_time 0.011248 (0.008431)
Epoch 5/100	It 240/391	loss 2.279059 (2.299303)	iter_time 0.004924 (0.008387)
Epoch 5/100	It 280/391	loss 2.278745 (2.303731)	iter_time 0.005110 (0.008368)
Epoch 5/100	It 320/391	loss 2.381857 (2.304088)	iter_time 0.025493 (0.008361)
Epoch 5/100	It 360/391	loss 2.269632 (2.302532)	iter_time 0.005043 (0.008275)


  6%|████████▎                                                                                                                                 | 6/100 [00:36<09:31,  6.07s/it]

Epoch 5/100	val_acc 38.9%
Epoch 6/100	It 0/391	loss 2.043574 (2.043574)	iter_time 0.101890 (0.101890)
Epoch 6/100	It 40/391	loss 2.380728 (2.247993)	iter_time 0.005215 (0.010987)
Epoch 6/100	It 80/391	loss 2.750546 (2.288017)	iter_time 0.016325 (0.009376)
Epoch 6/100	It 120/391	loss 2.154503 (2.270351)	iter_time 0.004741 (0.009170)
Epoch 6/100	It 160/391	loss 2.143884 (2.270865)	iter_time 0.005164 (0.009148)
Epoch 6/100	It 200/391	loss 2.264806 (2.271491)	iter_time 0.004976 (0.008909)
Epoch 6/100	It 240/391	loss 2.202191 (2.269156)	iter_time 0.004932 (0.008926)
Epoch 6/100	It 280/391	loss 2.371518 (2.272477)	iter_time 0.004693 (0.008839)
Epoch 6/100	It 320/391	loss 2.146800 (2.273046)	iter_time 0.004845 (0.008926)
Epoch 6/100	It 360/391	loss 2.487267 (2.271341)	iter_time 0.004703 (0.008813)


  7%|█████████▋                                                                                                                                | 7/100 [00:42<09:28,  6.12s/it]

Epoch 6/100	val_acc 39.29%
Epoch 7/100	It 0/391	loss 2.316791 (2.316791)	iter_time 0.105319 (0.105319)
Epoch 7/100	It 40/391	loss 1.955586 (2.208167)	iter_time 0.004928 (0.010352)
Epoch 7/100	It 80/391	loss 2.264788 (2.200315)	iter_time 0.005185 (0.009437)
Epoch 7/100	It 120/391	loss 2.504409 (2.224012)	iter_time 0.016222 (0.009019)
Epoch 7/100	It 160/391	loss 2.359607 (2.226723)	iter_time 0.004973 (0.009008)
Epoch 7/100	It 200/391	loss 2.262427 (2.229292)	iter_time 0.026601 (0.008903)
Epoch 7/100	It 240/391	loss 2.226571 (2.226890)	iter_time 0.022526 (0.008756)
Epoch 7/100	It 280/391	loss 2.359841 (2.230509)	iter_time 0.004899 (0.008750)
Epoch 7/100	It 320/391	loss 2.460229 (2.227996)	iter_time 0.005355 (0.008750)
Epoch 7/100	It 360/391	loss 2.166053 (2.228202)	iter_time 0.023919 (0.008676)


  8%|███████████                                                                                                                               | 8/100 [00:48<09:23,  6.12s/it]

Epoch 7/100	val_acc 39.69%
Epoch 8/100	It 0/391	loss 1.965752 (1.965752)	iter_time 0.100565 (0.100565)
Epoch 8/100	It 40/391	loss 2.094476 (2.226670)	iter_time 0.005115 (0.010175)
Epoch 8/100	It 80/391	loss 2.130908 (2.225749)	iter_time 0.023059 (0.009270)
Epoch 8/100	It 120/391	loss 2.325482 (2.212714)	iter_time 0.004879 (0.008711)
Epoch 8/100	It 160/391	loss 2.262461 (2.208975)	iter_time 0.004954 (0.008509)
Epoch 8/100	It 200/391	loss 2.332862 (2.212129)	iter_time 0.005353 (0.008457)
Epoch 8/100	It 240/391	loss 2.412527 (2.205216)	iter_time 0.005229 (0.008393)
Epoch 8/100	It 280/391	loss 2.328029 (2.211778)	iter_time 0.004923 (0.008283)
Epoch 8/100	It 320/391	loss 2.035512 (2.213712)	iter_time 0.004960 (0.008218)
Epoch 8/100	It 360/391	loss 2.174893 (2.209158)	iter_time 0.017296 (0.008183)


  9%|████████████▍                                                                                                                             | 9/100 [00:54<09:12,  6.07s/it]

Epoch 8/100	val_acc 40.06%
Epoch 9/100	It 0/391	loss 1.904132 (1.904132)	iter_time 0.116330 (0.116330)
Epoch 9/100	It 40/391	loss 2.256415 (2.186218)	iter_time 0.005020 (0.010317)
Epoch 9/100	It 80/391	loss 2.173943 (2.186996)	iter_time 0.004888 (0.009216)
Epoch 9/100	It 120/391	loss 2.033791 (2.189285)	iter_time 0.005037 (0.008737)
Epoch 9/100	It 160/391	loss 2.012881 (2.189415)	iter_time 0.004941 (0.008586)
Epoch 9/100	It 200/391	loss 2.140981 (2.192875)	iter_time 0.004997 (0.008537)
Epoch 9/100	It 240/391	loss 2.252023 (2.194371)	iter_time 0.027403 (0.008493)
Epoch 9/100	It 280/391	loss 2.213365 (2.197410)	iter_time 0.005055 (0.008298)
Epoch 9/100	It 320/391	loss 2.075807 (2.196322)	iter_time 0.005062 (0.008291)
Epoch 9/100	It 360/391	loss 2.509606 (2.192041)	iter_time 0.004723 (0.008350)


 10%|█████████████▋                                                                                                                           | 10/100 [01:01<09:09,  6.11s/it]

Epoch 9/100	val_acc 40.45%
Epoch 10/100	It 0/391	loss 2.103325 (2.103325)	iter_time 0.099767 (0.099767)
Epoch 10/100	It 40/391	loss 1.926887 (2.182274)	iter_time 0.005078 (0.010635)
Epoch 10/100	It 80/391	loss 2.069104 (2.186292)	iter_time 0.004714 (0.009536)
Epoch 10/100	It 120/391	loss 2.337014 (2.186362)	iter_time 0.004859 (0.009347)
Epoch 10/100	It 160/391	loss 1.937606 (2.176270)	iter_time 0.004973 (0.009346)
Epoch 10/100	It 200/391	loss 2.446547 (2.179525)	iter_time 0.005009 (0.009173)
Epoch 10/100	It 240/391	loss 2.249418 (2.176135)	iter_time 0.005424 (0.009026)
Epoch 10/100	It 280/391	loss 2.269075 (2.180179)	iter_time 0.005082 (0.008816)
Epoch 10/100	It 320/391	loss 2.171921 (2.175401)	iter_time 0.005131 (0.008750)
Epoch 10/100	It 360/391	loss 2.583627 (2.173442)	iter_time 0.005027 (0.008678)


 11%|███████████████                                                                                                                          | 11/100 [01:07<09:05,  6.13s/it]

Epoch 10/100	val_acc 40.89%
Epoch 11/100	It 0/391	loss 2.098807 (2.098807)	iter_time 0.099225 (0.099225)
Epoch 11/100	It 40/391	loss 2.096352 (2.142255)	iter_time 0.016672 (0.010565)
Epoch 11/100	It 80/391	loss 1.908875 (2.129576)	iter_time 0.005029 (0.009220)
Epoch 11/100	It 120/391	loss 1.978552 (2.129199)	iter_time 0.004945 (0.008736)
Epoch 11/100	It 160/391	loss 2.055690 (2.130944)	iter_time 0.026556 (0.008629)
Epoch 11/100	It 200/391	loss 2.144235 (2.139083)	iter_time 0.004987 (0.008412)
Epoch 11/100	It 240/391	loss 2.176706 (2.146340)	iter_time 0.005023 (0.008363)
Epoch 11/100	It 280/391	loss 2.142986 (2.148006)	iter_time 0.023223 (0.008329)
Epoch 11/100	It 320/391	loss 2.205807 (2.152656)	iter_time 0.004933 (0.008226)
Epoch 11/100	It 360/391	loss 2.067489 (2.158051)	iter_time 0.005234 (0.008154)


 12%|████████████████▍                                                                                                                        | 12/100 [01:13<09:00,  6.14s/it]

Epoch 11/100	val_acc 41.09%
Epoch 12/100	It 0/391	loss 2.038116 (2.038116)	iter_time 0.102408 (0.102408)
Epoch 12/100	It 40/391	loss 2.260733 (2.136010)	iter_time 0.005023 (0.010331)
Epoch 12/100	It 80/391	loss 2.319032 (2.163386)	iter_time 0.004708 (0.009221)
Epoch 12/100	It 120/391	loss 2.103851 (2.146477)	iter_time 0.004870 (0.008855)
Epoch 12/100	It 160/391	loss 2.193433 (2.141063)	iter_time 0.004806 (0.008600)
Epoch 12/100	It 200/391	loss 2.456195 (2.147349)	iter_time 0.004852 (0.008663)
Epoch 12/100	It 240/391	loss 1.950900 (2.141647)	iter_time 0.004754 (0.008568)
Epoch 12/100	It 280/391	loss 1.942787 (2.140996)	iter_time 0.004869 (0.008562)
Epoch 12/100	It 320/391	loss 2.401213 (2.148768)	iter_time 0.004743 (0.008540)
Epoch 12/100	It 360/391	loss 2.087904 (2.147679)	iter_time 0.004816 (0.008509)


 13%|█████████████████▊                                                                                                                       | 13/100 [01:19<08:55,  6.16s/it]

Epoch 12/100	val_acc 41.43%
Epoch 13/100	It 0/391	loss 1.905328 (1.905328)	iter_time 0.118948 (0.118948)
Epoch 13/100	It 40/391	loss 2.250306 (2.082228)	iter_time 0.005339 (0.011217)
Epoch 13/100	It 80/391	loss 1.941451 (2.091975)	iter_time 0.005081 (0.009331)
Epoch 13/100	It 120/391	loss 2.061427 (2.107946)	iter_time 0.004903 (0.008894)
Epoch 13/100	It 160/391	loss 2.387115 (2.110530)	iter_time 0.005196 (0.008804)
Epoch 13/100	It 200/391	loss 2.134454 (2.113749)	iter_time 0.004937 (0.008694)
Epoch 13/100	It 240/391	loss 2.233635 (2.115956)	iter_time 0.004951 (0.008446)
Epoch 13/100	It 280/391	loss 2.201389 (2.114999)	iter_time 0.004837 (0.008415)
Epoch 13/100	It 320/391	loss 2.086199 (2.119175)	iter_time 0.005217 (0.008385)
Epoch 13/100	It 360/391	loss 2.164670 (2.119866)	iter_time 0.015586 (0.008409)


 14%|███████████████████▏                                                                                                                     | 14/100 [01:25<08:49,  6.16s/it]

Epoch 13/100	val_acc 41.81%
Epoch 14/100	It 0/391	loss 2.150934 (2.150934)	iter_time 0.101128 (0.101128)
Epoch 14/100	It 40/391	loss 1.919829 (2.093998)	iter_time 0.016119 (0.010725)
Epoch 14/100	It 80/391	loss 2.183081 (2.102417)	iter_time 0.031543 (0.009758)
Epoch 14/100	It 120/391	loss 2.222980 (2.100161)	iter_time 0.014195 (0.009247)
Epoch 14/100	It 160/391	loss 2.452598 (2.095459)	iter_time 0.004982 (0.008871)
Epoch 14/100	It 200/391	loss 2.185271 (2.106142)	iter_time 0.005326 (0.008613)
Epoch 14/100	It 240/391	loss 2.194177 (2.106812)	iter_time 0.004815 (0.008511)
Epoch 14/100	It 280/391	loss 2.075547 (2.108352)	iter_time 0.005159 (0.008523)
Epoch 14/100	It 320/391	loss 2.025884 (2.108103)	iter_time 0.004948 (0.008447)
Epoch 14/100	It 360/391	loss 2.322089 (2.110956)	iter_time 0.004872 (0.008370)


 15%|████████████████████▌                                                                                                                    | 15/100 [01:31<08:41,  6.14s/it]

Epoch 14/100	val_acc 41.97%
Epoch 15/100	It 0/391	loss 2.149159 (2.149159)	iter_time 0.101786 (0.101786)
Epoch 15/100	It 40/391	loss 2.466039 (2.101251)	iter_time 0.004688 (0.010973)
Epoch 15/100	It 80/391	loss 1.867011 (2.074185)	iter_time 0.004991 (0.009670)
Epoch 15/100	It 120/391	loss 2.247872 (2.076211)	iter_time 0.010404 (0.009261)
Epoch 15/100	It 160/391	loss 2.236578 (2.075137)	iter_time 0.012952 (0.008990)
Epoch 15/100	It 200/391	loss 2.192996 (2.075780)	iter_time 0.004916 (0.008890)
Epoch 15/100	It 240/391	loss 2.019377 (2.076609)	iter_time 0.004776 (0.008864)
Epoch 15/100	It 280/391	loss 2.223459 (2.085709)	iter_time 0.005260 (0.008746)
Epoch 15/100	It 320/391	loss 2.036312 (2.088832)	iter_time 0.004979 (0.008640)
Epoch 15/100	It 360/391	loss 2.313132 (2.087975)	iter_time 0.004750 (0.008669)


 16%|█████████████████████▉                                                                                                                   | 16/100 [01:38<08:36,  6.15s/it]

Epoch 15/100	val_acc 42.28%
Epoch 16/100	It 0/391	loss 2.231405 (2.231405)	iter_time 0.099287 (0.099287)
Epoch 16/100	It 40/391	loss 2.060786 (2.075574)	iter_time 0.005014 (0.011260)
Epoch 16/100	It 80/391	loss 2.315652 (2.082866)	iter_time 0.005315 (0.009755)
Epoch 16/100	It 120/391	loss 2.174554 (2.087161)	iter_time 0.025756 (0.009159)
Epoch 16/100	It 160/391	loss 2.173178 (2.087717)	iter_time 0.005022 (0.008906)
Epoch 16/100	It 200/391	loss 1.888680 (2.083910)	iter_time 0.004757 (0.008783)
Epoch 16/100	It 240/391	loss 2.005292 (2.079006)	iter_time 0.025083 (0.008613)
Epoch 16/100	It 280/391	loss 1.951373 (2.079026)	iter_time 0.004977 (0.008489)
Epoch 16/100	It 320/391	loss 2.316564 (2.078251)	iter_time 0.005157 (0.008455)
Epoch 16/100	It 360/391	loss 2.129911 (2.082013)	iter_time 0.005134 (0.008390)


 17%|███████████████████████▎                                                                                                                 | 17/100 [01:44<08:31,  6.16s/it]

Epoch 16/100	val_acc 42.51%
Epoch 17/100	It 0/391	loss 1.928273 (1.928273)	iter_time 0.100422 (0.100422)
Epoch 17/100	It 40/391	loss 2.129981 (2.068932)	iter_time 0.005092 (0.010416)
Epoch 17/100	It 80/391	loss 2.259131 (2.061019)	iter_time 0.004781 (0.009157)
Epoch 17/100	It 120/391	loss 2.162414 (2.069061)	iter_time 0.005062 (0.008807)
Epoch 17/100	It 160/391	loss 2.178116 (2.067918)	iter_time 0.020353 (0.008500)
Epoch 17/100	It 200/391	loss 2.020501 (2.067190)	iter_time 0.004968 (0.008362)
Epoch 17/100	It 240/391	loss 1.839126 (2.063777)	iter_time 0.014130 (0.008277)
Epoch 17/100	It 280/391	loss 2.208632 (2.064703)	iter_time 0.004683 (0.008287)
Epoch 17/100	It 320/391	loss 2.123410 (2.059723)	iter_time 0.025514 (0.008303)
Epoch 17/100	It 360/391	loss 1.898752 (2.060708)	iter_time 0.005101 (0.008243)


 18%|████████████████████████▋                                                                                                                | 18/100 [01:50<08:22,  6.13s/it]

Epoch 17/100	val_acc 42.68%
Epoch 18/100	It 0/391	loss 1.996182 (1.996182)	iter_time 0.097039 (0.097039)
Epoch 18/100	It 40/391	loss 1.998348 (1.994820)	iter_time 0.004923 (0.010369)
Epoch 18/100	It 80/391	loss 1.829887 (2.035784)	iter_time 0.005073 (0.009069)
Epoch 18/100	It 120/391	loss 1.715597 (2.028224)	iter_time 0.004931 (0.008833)
Epoch 18/100	It 160/391	loss 2.171799 (2.037323)	iter_time 0.023756 (0.008637)
Epoch 18/100	It 200/391	loss 2.164151 (2.039227)	iter_time 0.004696 (0.008436)
Epoch 18/100	It 240/391	loss 2.079733 (2.040402)	iter_time 0.004835 (0.008427)
Epoch 18/100	It 280/391	loss 1.779599 (2.043902)	iter_time 0.004922 (0.008271)
Epoch 18/100	It 320/391	loss 1.990798 (2.045211)	iter_time 0.017671 (0.008241)
Epoch 18/100	It 360/391	loss 1.905905 (2.043998)	iter_time 0.022427 (0.008216)


 19%|██████████████████████████                                                                                                               | 19/100 [01:56<08:14,  6.11s/it]

Epoch 18/100	val_acc 42.8%
Epoch 19/100	It 0/391	loss 1.856793 (1.856793)	iter_time 0.099810 (0.099810)
Epoch 19/100	It 40/391	loss 2.221458 (2.007637)	iter_time 0.004757 (0.011156)
Epoch 19/100	It 80/391	loss 2.177422 (2.017500)	iter_time 0.019679 (0.009387)
Epoch 19/100	It 120/391	loss 1.885818 (2.028398)	iter_time 0.004896 (0.008974)
Epoch 19/100	It 160/391	loss 1.835882 (2.029543)	iter_time 0.005080 (0.008909)
Epoch 19/100	It 200/391	loss 2.035971 (2.029613)	iter_time 0.004789 (0.008610)
Epoch 19/100	It 240/391	loss 2.249745 (2.031265)	iter_time 0.005018 (0.008573)
Epoch 19/100	It 280/391	loss 2.182249 (2.032357)	iter_time 0.004811 (0.008624)
Epoch 19/100	It 320/391	loss 1.928762 (2.035674)	iter_time 0.018733 (0.008553)
Epoch 19/100	It 360/391	loss 2.107076 (2.039623)	iter_time 0.004912 (0.008590)


 20%|███████████████████████████▍                                                                                                             | 20/100 [02:02<08:10,  6.13s/it]

Epoch 19/100	val_acc 42.86%
Epoch 20/100	It 0/391	loss 2.133306 (2.133306)	iter_time 0.121671 (0.121671)
Epoch 20/100	It 40/391	loss 1.994964 (1.969277)	iter_time 0.004654 (0.011165)
Epoch 20/100	It 80/391	loss 1.953084 (2.012241)	iter_time 0.004705 (0.009988)
Epoch 20/100	It 120/391	loss 1.693084 (2.023793)	iter_time 0.016822 (0.009664)
Epoch 20/100	It 160/391	loss 1.989649 (2.016915)	iter_time 0.005105 (0.009136)
Epoch 20/100	It 200/391	loss 2.030354 (2.019606)	iter_time 0.005188 (0.008946)
Epoch 20/100	It 240/391	loss 2.088324 (2.019876)	iter_time 0.023241 (0.008825)
Epoch 20/100	It 280/391	loss 2.128557 (2.024917)	iter_time 0.004703 (0.008621)
Epoch 20/100	It 320/391	loss 2.020305 (2.029294)	iter_time 0.020230 (0.008566)
Epoch 20/100	It 360/391	loss 2.094332 (2.027685)	iter_time 0.005134 (0.008484)


 21%|████████████████████████████▊                                                                                                            | 21/100 [02:08<08:04,  6.13s/it]

Epoch 20/100	val_acc 43.31%
Epoch 21/100	It 0/391	loss 2.133212 (2.133212)	iter_time 0.100771 (0.100771)
Epoch 21/100	It 40/391	loss 1.892859 (1.980466)	iter_time 0.004954 (0.010976)
Epoch 21/100	It 80/391	loss 2.059856 (2.007036)	iter_time 0.004803 (0.009458)
Epoch 21/100	It 120/391	loss 1.990045 (2.009385)	iter_time 0.009104 (0.009296)
Epoch 21/100	It 160/391	loss 1.942028 (2.018093)	iter_time 0.004585 (0.009327)
Epoch 21/100	It 200/391	loss 1.959716 (2.016974)	iter_time 0.004960 (0.009218)
Epoch 21/100	It 240/391	loss 2.088339 (2.021386)	iter_time 0.028825 (0.009224)
Epoch 21/100	It 280/391	loss 2.107996 (2.017470)	iter_time 0.004687 (0.009155)
Epoch 21/100	It 320/391	loss 2.196538 (2.017135)	iter_time 0.004779 (0.009176)
Epoch 21/100	It 360/391	loss 2.199277 (2.022708)	iter_time 0.028502 (0.009186)


 22%|██████████████████████████████▏                                                                                                          | 22/100 [02:15<08:05,  6.22s/it]

Epoch 21/100	val_acc 43.33%
Epoch 22/100	It 0/391	loss 1.918061 (1.918061)	iter_time 0.098071 (0.098071)
Epoch 22/100	It 40/391	loss 2.075123 (2.042582)	iter_time 0.005150 (0.010912)
Epoch 22/100	It 80/391	loss 1.851073 (2.012936)	iter_time 0.005081 (0.009217)
Epoch 22/100	It 120/391	loss 2.239351 (2.026534)	iter_time 0.020619 (0.008959)
Epoch 22/100	It 160/391	loss 2.065562 (2.014672)	iter_time 0.030226 (0.008996)
Epoch 22/100	It 200/391	loss 2.062414 (2.014397)	iter_time 0.005064 (0.008816)
Epoch 22/100	It 240/391	loss 2.263378 (2.018285)	iter_time 0.004998 (0.008604)
Epoch 22/100	It 280/391	loss 2.200206 (2.019500)	iter_time 0.005002 (0.008523)
Epoch 22/100	It 320/391	loss 2.017107 (2.020365)	iter_time 0.005160 (0.008486)
Epoch 22/100	It 360/391	loss 1.780295 (2.019130)	iter_time 0.005267 (0.008393)


 23%|███████████████████████████████▌                                                                                                         | 23/100 [02:21<07:54,  6.16s/it]

Epoch 22/100	val_acc 43.72%
Epoch 23/100	It 0/391	loss 2.010567 (2.010567)	iter_time 0.097184 (0.097184)
Epoch 23/100	It 40/391	loss 1.983336 (1.960011)	iter_time 0.004776 (0.011191)
Epoch 23/100	It 80/391	loss 2.042523 (1.961881)	iter_time 0.005027 (0.009701)
Epoch 23/100	It 120/391	loss 1.883152 (1.972411)	iter_time 0.004953 (0.009651)
Epoch 23/100	It 160/391	loss 1.706345 (1.981005)	iter_time 0.015930 (0.009179)
Epoch 23/100	It 200/391	loss 2.000626 (1.983157)	iter_time 0.005041 (0.008890)
Epoch 23/100	It 240/391	loss 2.080717 (1.991634)	iter_time 0.005381 (0.008907)
Epoch 23/100	It 280/391	loss 1.966499 (1.996269)	iter_time 0.004937 (0.008847)
Epoch 23/100	It 320/391	loss 1.915984 (2.000176)	iter_time 0.005242 (0.008669)
Epoch 23/100	It 360/391	loss 2.133240 (2.001910)	iter_time 0.009965 (0.008544)


 24%|████████████████████████████████▉                                                                                                        | 24/100 [02:27<07:47,  6.15s/it]

Epoch 23/100	val_acc 43.74%
Epoch 24/100	It 0/391	loss 1.997394 (1.997394)	iter_time 0.099416 (0.099416)
Epoch 24/100	It 40/391	loss 2.095061 (1.992425)	iter_time 0.011653 (0.011012)
Epoch 24/100	It 80/391	loss 1.803584 (2.006399)	iter_time 0.004928 (0.009643)
Epoch 24/100	It 120/391	loss 1.886798 (1.990755)	iter_time 0.029338 (0.009161)
Epoch 24/100	It 160/391	loss 1.934558 (1.987292)	iter_time 0.004923 (0.008803)
Epoch 24/100	It 200/391	loss 1.990596 (1.988444)	iter_time 0.004774 (0.008534)
Epoch 24/100	It 240/391	loss 2.201932 (1.990374)	iter_time 0.011639 (0.008416)
Epoch 24/100	It 280/391	loss 1.725504 (1.991288)	iter_time 0.023312 (0.008397)
Epoch 24/100	It 320/391	loss 1.789819 (1.993052)	iter_time 0.004973 (0.008319)
Epoch 24/100	It 360/391	loss 1.943921 (1.996023)	iter_time 0.005137 (0.008329)


 25%|██████████████████████████████████▎                                                                                                      | 25/100 [02:33<07:40,  6.15s/it]

Epoch 24/100	val_acc 43.88%
Epoch 25/100	It 0/391	loss 2.048926 (2.048926)	iter_time 0.099504 (0.099504)
Epoch 25/100	It 40/391	loss 2.056208 (2.000226)	iter_time 0.005062 (0.011092)
Epoch 25/100	It 80/391	loss 2.071812 (1.992365)	iter_time 0.017206 (0.009626)
Epoch 25/100	It 120/391	loss 1.780649 (1.988768)	iter_time 0.004885 (0.009236)
Epoch 25/100	It 160/391	loss 1.960720 (1.989805)	iter_time 0.005031 (0.008845)
Epoch 25/100	It 200/391	loss 1.999361 (1.990518)	iter_time 0.004899 (0.008586)
Epoch 25/100	It 240/391	loss 2.124046 (1.991409)	iter_time 0.004975 (0.008602)
Epoch 25/100	It 280/391	loss 2.098851 (1.990059)	iter_time 0.022543 (0.008558)
Epoch 25/100	It 320/391	loss 2.052932 (1.987777)	iter_time 0.004960 (0.008447)
Epoch 25/100	It 360/391	loss 1.866265 (1.990285)	iter_time 0.004926 (0.008349)


 26%|███████████████████████████████████▌                                                                                                     | 26/100 [02:39<07:31,  6.10s/it]

Epoch 25/100	val_acc 43.85%
Epoch 26/100	It 0/391	loss 1.884717 (1.884717)	iter_time 0.100104 (0.100104)
Epoch 26/100	It 40/391	loss 1.596058 (1.930661)	iter_time 0.005124 (0.010757)
Epoch 26/100	It 80/391	loss 1.921423 (1.949736)	iter_time 0.004910 (0.009350)
Epoch 26/100	It 120/391	loss 2.022011 (1.969096)	iter_time 0.005207 (0.009109)
Epoch 26/100	It 160/391	loss 1.862852 (1.972159)	iter_time 0.019111 (0.008914)
Epoch 26/100	It 200/391	loss 1.890775 (1.976382)	iter_time 0.008928 (0.008704)
Epoch 26/100	It 240/391	loss 2.082870 (1.980350)	iter_time 0.004854 (0.008727)
Epoch 26/100	It 280/391	loss 2.060945 (1.978860)	iter_time 0.004791 (0.008552)
Epoch 26/100	It 320/391	loss 2.320035 (1.980951)	iter_time 0.004972 (0.008498)
Epoch 26/100	It 360/391	loss 2.021728 (1.984036)	iter_time 0.004941 (0.008478)


 27%|████████████████████████████████████▉                                                                                                    | 27/100 [02:45<07:25,  6.10s/it]

Epoch 26/100	val_acc 44.11%
Epoch 27/100	It 0/391	loss 1.757815 (1.757815)	iter_time 0.096744 (0.096744)
Epoch 27/100	It 40/391	loss 2.017128 (1.934400)	iter_time 0.005068 (0.010877)
Epoch 27/100	It 80/391	loss 1.873962 (1.955323)	iter_time 0.004700 (0.009359)
Epoch 27/100	It 120/391	loss 2.003651 (1.949499)	iter_time 0.004966 (0.009140)
Epoch 27/100	It 160/391	loss 2.085484 (1.949843)	iter_time 0.010226 (0.008772)
Epoch 27/100	It 200/391	loss 2.158766 (1.956470)	iter_time 0.005073 (0.008583)
Epoch 27/100	It 240/391	loss 1.886198 (1.956580)	iter_time 0.004673 (0.008588)
Epoch 27/100	It 280/391	loss 2.014489 (1.958677)	iter_time 0.004913 (0.008498)
Epoch 27/100	It 320/391	loss 2.008768 (1.963513)	iter_time 0.004888 (0.008521)
Epoch 27/100	It 360/391	loss 2.133363 (1.966709)	iter_time 0.004788 (0.008558)


 28%|██████████████████████████████████████▎                                                                                                  | 28/100 [02:51<07:20,  6.11s/it]

Epoch 27/100	val_acc 44.27%
Epoch 28/100	It 0/391	loss 1.946747 (1.946747)	iter_time 0.100507 (0.100507)
Epoch 28/100	It 40/391	loss 1.919493 (1.943701)	iter_time 0.004772 (0.011352)
Epoch 28/100	It 80/391	loss 1.994709 (1.920936)	iter_time 0.029113 (0.010431)
Epoch 28/100	It 120/391	loss 2.078103 (1.932227)	iter_time 0.004710 (0.009902)
Epoch 28/100	It 160/391	loss 2.033607 (1.936550)	iter_time 0.004949 (0.009766)
Epoch 28/100	It 200/391	loss 1.955731 (1.940194)	iter_time 0.014748 (0.009347)
Epoch 28/100	It 240/391	loss 1.801082 (1.946363)	iter_time 0.031328 (0.009334)
Epoch 28/100	It 280/391	loss 1.866891 (1.947288)	iter_time 0.004928 (0.009080)
Epoch 28/100	It 320/391	loss 2.204473 (1.947644)	iter_time 0.005008 (0.008883)
Epoch 28/100	It 360/391	loss 1.738435 (1.948230)	iter_time 0.005025 (0.008839)


 29%|███████████████████████████████████████▋                                                                                                 | 29/100 [02:57<07:15,  6.14s/it]

Epoch 28/100	val_acc 44.43%
Epoch 29/100	It 0/391	loss 2.126192 (2.126192)	iter_time 0.115666 (0.115666)
Epoch 29/100	It 40/391	loss 1.975475 (1.928340)	iter_time 0.004705 (0.011495)
Epoch 29/100	It 80/391	loss 1.741176 (1.941621)	iter_time 0.004739 (0.010027)
Epoch 29/100	It 120/391	loss 1.873244 (1.938348)	iter_time 0.004800 (0.009511)
Epoch 29/100	It 160/391	loss 1.814207 (1.936881)	iter_time 0.004916 (0.009453)
Epoch 29/100	It 200/391	loss 1.896852 (1.941984)	iter_time 0.005016 (0.009208)
Epoch 29/100	It 240/391	loss 1.940150 (1.946110)	iter_time 0.004571 (0.009130)
Epoch 29/100	It 280/391	loss 1.962295 (1.948553)	iter_time 0.022909 (0.009042)
Epoch 29/100	It 320/391	loss 2.103812 (1.951897)	iter_time 0.005095 (0.008866)
Epoch 29/100	It 360/391	loss 1.881297 (1.954400)	iter_time 0.021945 (0.008806)


 30%|█████████████████████████████████████████                                                                                                | 30/100 [03:04<07:12,  6.18s/it]

Epoch 29/100	val_acc 44.36%
Epoch 30/100	It 0/391	loss 1.751454 (1.751454)	iter_time 0.098674 (0.098674)
Epoch 30/100	It 40/391	loss 1.838170 (1.941674)	iter_time 0.005180 (0.010386)
Epoch 30/100	It 80/391	loss 1.801080 (1.937797)	iter_time 0.029260 (0.009314)
Epoch 30/100	It 120/391	loss 2.153855 (1.932095)	iter_time 0.005136 (0.008907)
Epoch 30/100	It 160/391	loss 2.143237 (1.931531)	iter_time 0.005234 (0.008790)
Epoch 30/100	It 200/391	loss 1.828396 (1.936322)	iter_time 0.023585 (0.008687)
Epoch 30/100	It 240/391	loss 2.061470 (1.938642)	iter_time 0.004861 (0.008546)
Epoch 30/100	It 280/391	loss 2.162436 (1.942860)	iter_time 0.004762 (0.008472)
Epoch 30/100	It 320/391	loss 2.043529 (1.944789)	iter_time 0.004944 (0.008526)
Epoch 30/100	It 360/391	loss 1.644864 (1.949497)	iter_time 0.004873 (0.008422)


 31%|██████████████████████████████████████████▍                                                                                              | 31/100 [03:10<07:03,  6.14s/it]

Epoch 30/100	val_acc 44.51%
Epoch 31/100	It 0/391	loss 2.013150 (2.013150)	iter_time 0.097544 (0.097544)
Epoch 31/100	It 40/391	loss 1.930167 (1.933091)	iter_time 0.004934 (0.010663)
Epoch 31/100	It 80/391	loss 2.090831 (1.929328)	iter_time 0.021753 (0.009661)
Epoch 31/100	It 120/391	loss 1.969158 (1.925523)	iter_time 0.004916 (0.009072)
Epoch 31/100	It 160/391	loss 1.836933 (1.924475)	iter_time 0.004773 (0.008901)
Epoch 31/100	It 200/391	loss 2.150196 (1.923587)	iter_time 0.009579 (0.008529)
Epoch 31/100	It 240/391	loss 1.981089 (1.929637)	iter_time 0.004933 (0.008407)
Epoch 31/100	It 280/391	loss 1.940797 (1.932035)	iter_time 0.004956 (0.008374)
Epoch 31/100	It 320/391	loss 2.215691 (1.934099)	iter_time 0.005011 (0.008344)
Epoch 31/100	It 360/391	loss 1.897701 (1.937276)	iter_time 0.004705 (0.008286)


 32%|███████████████████████████████████████████▊                                                                                             | 32/100 [03:16<06:54,  6.10s/it]

Epoch 31/100	val_acc 44.75%
Epoch 32/100	It 0/391	loss 1.894990 (1.894990)	iter_time 0.111091 (0.111091)
Epoch 32/100	It 40/391	loss 1.967560 (1.913900)	iter_time 0.004968 (0.010058)
Epoch 32/100	It 80/391	loss 1.825797 (1.898948)	iter_time 0.004766 (0.009196)
Epoch 32/100	It 120/391	loss 1.951224 (1.899328)	iter_time 0.005043 (0.009028)
Epoch 32/100	It 160/391	loss 1.694398 (1.908746)	iter_time 0.004968 (0.009021)
Epoch 32/100	It 200/391	loss 1.873558 (1.918236)	iter_time 0.004880 (0.008970)
Epoch 32/100	It 240/391	loss 1.831605 (1.918026)	iter_time 0.004977 (0.008815)
Epoch 32/100	It 280/391	loss 2.018616 (1.922214)	iter_time 0.004966 (0.008718)
Epoch 32/100	It 320/391	loss 1.816828 (1.927277)	iter_time 0.011866 (0.008607)
Epoch 32/100	It 360/391	loss 1.845373 (1.929022)	iter_time 0.004919 (0.008510)


 33%|█████████████████████████████████████████████▏                                                                                           | 33/100 [03:22<06:48,  6.09s/it]

Epoch 32/100	val_acc 44.84%
Epoch 33/100	It 0/391	loss 1.663393 (1.663393)	iter_time 0.097154 (0.097154)
Epoch 33/100	It 40/391	loss 1.954808 (1.872101)	iter_time 0.004992 (0.010373)
Epoch 33/100	It 80/391	loss 1.883932 (1.882331)	iter_time 0.023422 (0.009229)
Epoch 33/100	It 120/391	loss 1.879555 (1.889210)	iter_time 0.005272 (0.008714)
Epoch 33/100	It 160/391	loss 1.980459 (1.903781)	iter_time 0.004991 (0.008638)
Epoch 33/100	It 200/391	loss 1.859424 (1.902175)	iter_time 0.007645 (0.008554)
Epoch 33/100	It 240/391	loss 2.144001 (1.908006)	iter_time 0.005038 (0.008416)
Epoch 33/100	It 280/391	loss 1.795066 (1.912108)	iter_time 0.004927 (0.008369)
Epoch 33/100	It 320/391	loss 1.833565 (1.914044)	iter_time 0.022228 (0.008350)
Epoch 33/100	It 360/391	loss 1.921571 (1.918723)	iter_time 0.004970 (0.008282)


 34%|██████████████████████████████████████████████▌                                                                                          | 34/100 [03:28<06:41,  6.09s/it]

Epoch 33/100	val_acc 44.92%
Epoch 34/100	It 0/391	loss 1.900849 (1.900849)	iter_time 0.099410 (0.099410)
Epoch 34/100	It 40/391	loss 2.083513 (1.875046)	iter_time 0.005269 (0.010635)
Epoch 34/100	It 80/391	loss 1.962191 (1.878909)	iter_time 0.009675 (0.009330)
Epoch 34/100	It 120/391	loss 1.994236 (1.891186)	iter_time 0.004992 (0.009043)
Epoch 34/100	It 160/391	loss 1.836205 (1.894851)	iter_time 0.004942 (0.008696)
Epoch 34/100	It 200/391	loss 2.076928 (1.906137)	iter_time 0.005459 (0.008465)
Epoch 34/100	It 240/391	loss 1.927536 (1.908469)	iter_time 0.007986 (0.008517)
Epoch 34/100	It 280/391	loss 1.932761 (1.909311)	iter_time 0.004871 (0.008570)
Epoch 34/100	It 320/391	loss 1.806444 (1.912298)	iter_time 0.005068 (0.008622)
Epoch 34/100	It 360/391	loss 2.126656 (1.918396)	iter_time 0.004915 (0.008556)


 35%|███████████████████████████████████████████████▉                                                                                         | 35/100 [03:34<06:37,  6.12s/it]

Epoch 34/100	val_acc 44.95%
Epoch 35/100	It 0/391	loss 2.003034 (2.003034)	iter_time 0.106956 (0.106956)
Epoch 35/100	It 40/391	loss 1.977291 (1.896126)	iter_time 0.004962 (0.010445)
Epoch 35/100	It 80/391	loss 2.078332 (1.907102)	iter_time 0.005310 (0.009133)
Epoch 35/100	It 120/391	loss 1.934666 (1.905205)	iter_time 0.004937 (0.008707)
Epoch 35/100	It 160/391	loss 1.692035 (1.914177)	iter_time 0.004987 (0.008473)
Epoch 35/100	It 200/391	loss 1.981377 (1.910652)	iter_time 0.005139 (0.008390)
Epoch 35/100	It 240/391	loss 1.925541 (1.912387)	iter_time 0.004981 (0.008264)
Epoch 35/100	It 280/391	loss 1.973117 (1.915313)	iter_time 0.013421 (0.008241)
Epoch 35/100	It 320/391	loss 2.210727 (1.916737)	iter_time 0.005279 (0.008199)
Epoch 35/100	It 360/391	loss 2.031551 (1.914919)	iter_time 0.004953 (0.008167)


 36%|█████████████████████████████████████████████████▎                                                                                       | 36/100 [03:40<06:31,  6.12s/it]

Epoch 35/100	val_acc 45.13%
Epoch 36/100	It 0/391	loss 1.606046 (1.606046)	iter_time 0.097546 (0.097546)
Epoch 36/100	It 40/391	loss 2.169724 (1.897335)	iter_time 0.004712 (0.011158)
Epoch 36/100	It 80/391	loss 1.932891 (1.909147)	iter_time 0.004734 (0.010336)
Epoch 36/100	It 120/391	loss 1.951563 (1.909573)	iter_time 0.005343 (0.009537)
Epoch 36/100	It 160/391	loss 1.945943 (1.907845)	iter_time 0.004894 (0.009229)
Epoch 36/100	It 200/391	loss 1.741833 (1.903547)	iter_time 0.004840 (0.009092)
Epoch 36/100	It 240/391	loss 1.577301 (1.903590)	iter_time 0.004966 (0.009006)
Epoch 36/100	It 280/391	loss 1.735216 (1.906096)	iter_time 0.004967 (0.008700)
Epoch 36/100	It 320/391	loss 1.805380 (1.907637)	iter_time 0.004993 (0.008785)
Epoch 36/100	It 360/391	loss 1.774307 (1.903690)	iter_time 0.025802 (0.008787)


 37%|██████████████████████████████████████████████████▋                                                                                      | 37/100 [03:46<06:27,  6.16s/it]

Epoch 36/100	val_acc 45.15%
Epoch 37/100	It 0/391	loss 2.274345 (2.274345)	iter_time 0.100187 (0.100187)
Epoch 37/100	It 40/391	loss 2.040664 (1.863757)	iter_time 0.005136 (0.010064)
Epoch 37/100	It 80/391	loss 2.071751 (1.898525)	iter_time 0.021539 (0.009060)
Epoch 37/100	It 120/391	loss 1.743442 (1.894363)	iter_time 0.005009 (0.008622)
Epoch 37/100	It 160/391	loss 2.009196 (1.899461)	iter_time 0.005061 (0.008380)
Epoch 37/100	It 200/391	loss 1.755522 (1.897081)	iter_time 0.004973 (0.008322)
Epoch 37/100	It 240/391	loss 2.196033 (1.900231)	iter_time 0.004966 (0.008249)
Epoch 37/100	It 280/391	loss 1.872751 (1.904413)	iter_time 0.005025 (0.008278)
Epoch 37/100	It 320/391	loss 1.920772 (1.903970)	iter_time 0.005400 (0.008262)
Epoch 37/100	It 360/391	loss 1.906476 (1.908756)	iter_time 0.005208 (0.008219)


 38%|████████████████████████████████████████████████████                                                                                     | 38/100 [03:52<06:17,  6.09s/it]

Epoch 37/100	val_acc 45.5%
Epoch 38/100	It 0/391	loss 1.774555 (1.774555)	iter_time 0.099493 (0.099493)
Epoch 38/100	It 40/391	loss 1.733625 (1.868369)	iter_time 0.017145 (0.010299)
Epoch 38/100	It 80/391	loss 1.986690 (1.881427)	iter_time 0.005062 (0.009049)
Epoch 38/100	It 120/391	loss 1.765023 (1.887469)	iter_time 0.030454 (0.008715)
Epoch 38/100	It 160/391	loss 1.849008 (1.891778)	iter_time 0.004733 (0.008626)
Epoch 38/100	It 200/391	loss 1.840491 (1.889403)	iter_time 0.005021 (0.008587)
Epoch 38/100	It 240/391	loss 2.210243 (1.900920)	iter_time 0.026402 (0.008583)
Epoch 38/100	It 280/391	loss 1.640043 (1.895460)	iter_time 0.004954 (0.008458)
Epoch 38/100	It 320/391	loss 1.801848 (1.891653)	iter_time 0.005216 (0.008426)
Epoch 38/100	It 360/391	loss 1.990790 (1.890861)	iter_time 0.022229 (0.008387)


 39%|█████████████████████████████████████████████████████▍                                                                                   | 39/100 [03:58<06:10,  6.07s/it]

Epoch 38/100	val_acc 45.58%
Epoch 39/100	It 0/391	loss 1.724687 (1.724687)	iter_time 0.119412 (0.119412)
Epoch 39/100	It 40/391	loss 1.797673 (1.847259)	iter_time 0.004898 (0.011662)
Epoch 39/100	It 80/391	loss 1.963102 (1.857344)	iter_time 0.004909 (0.010147)
Epoch 39/100	It 120/391	loss 1.895382 (1.865441)	iter_time 0.020069 (0.009396)
Epoch 39/100	It 160/391	loss 2.028783 (1.879128)	iter_time 0.005016 (0.009008)
Epoch 39/100	It 200/391	loss 2.000746 (1.878376)	iter_time 0.004898 (0.008789)
Epoch 39/100	It 240/391	loss 1.844466 (1.878717)	iter_time 0.005051 (0.008572)
Epoch 39/100	It 280/391	loss 1.796200 (1.886564)	iter_time 0.005056 (0.008472)
Epoch 39/100	It 320/391	loss 2.091927 (1.894655)	iter_time 0.013629 (0.008436)
Epoch 39/100	It 360/391	loss 2.063523 (1.896188)	iter_time 0.004840 (0.008482)


 40%|██████████████████████████████████████████████████████▊                                                                                  | 40/100 [04:05<06:05,  6.09s/it]

Epoch 39/100	val_acc 45.55%
Epoch 40/100	It 0/391	loss 1.746866 (1.746866)	iter_time 0.118457 (0.118457)
Epoch 40/100	It 40/391	loss 2.003635 (1.869871)	iter_time 0.005095 (0.010288)
Epoch 40/100	It 80/391	loss 2.050479 (1.889801)	iter_time 0.007217 (0.009502)
Epoch 40/100	It 120/391	loss 1.958355 (1.884555)	iter_time 0.004953 (0.009377)
Epoch 40/100	It 160/391	loss 1.770414 (1.876531)	iter_time 0.004693 (0.009088)
Epoch 40/100	It 200/391	loss 2.057091 (1.874678)	iter_time 0.022122 (0.008887)
Epoch 40/100	It 240/391	loss 1.995616 (1.879567)	iter_time 0.005068 (0.008674)
Epoch 40/100	It 280/391	loss 1.817665 (1.885533)	iter_time 0.004985 (0.008554)
Epoch 40/100	It 320/391	loss 1.985475 (1.888853)	iter_time 0.005274 (0.008471)
Epoch 40/100	It 360/391	loss 1.746684 (1.891404)	iter_time 0.005109 (0.008429)


 41%|████████████████████████████████████████████████████████▏                                                                                | 41/100 [04:11<05:58,  6.08s/it]

Epoch 40/100	val_acc 45.68%
Epoch 41/100	It 0/391	loss 1.770959 (1.770959)	iter_time 0.119210 (0.119210)
Epoch 41/100	It 40/391	loss 1.671964 (1.831466)	iter_time 0.005217 (0.010335)
Epoch 41/100	It 80/391	loss 1.867620 (1.849193)	iter_time 0.004986 (0.009144)
Epoch 41/100	It 120/391	loss 2.084395 (1.859806)	iter_time 0.005053 (0.008873)
Epoch 41/100	It 160/391	loss 1.949496 (1.868704)	iter_time 0.027647 (0.008618)
Epoch 41/100	It 200/391	loss 1.817975 (1.875494)	iter_time 0.004933 (0.008348)
Epoch 41/100	It 240/391	loss 1.810173 (1.879980)	iter_time 0.004930 (0.008341)
Epoch 41/100	It 280/391	loss 2.035511 (1.881335)	iter_time 0.004918 (0.008308)
Epoch 41/100	It 320/391	loss 1.907581 (1.881680)	iter_time 0.005102 (0.008314)
Epoch 41/100	It 360/391	loss 1.950957 (1.884356)	iter_time 0.004714 (0.008244)


 42%|█████████████████████████████████████████████████████████▌                                                                               | 42/100 [04:16<05:49,  6.03s/it]

Epoch 41/100	val_acc 45.75%
Epoch 42/100	It 0/391	loss 1.851200 (1.851200)	iter_time 0.100261 (0.100261)
Epoch 42/100	It 40/391	loss 1.979127 (1.876306)	iter_time 0.010998 (0.010991)
Epoch 42/100	It 80/391	loss 2.047934 (1.871940)	iter_time 0.020064 (0.009641)
Epoch 42/100	It 120/391	loss 2.050918 (1.874524)	iter_time 0.006194 (0.009346)
Epoch 42/100	It 160/391	loss 1.871803 (1.880337)	iter_time 0.004937 (0.008994)
Epoch 42/100	It 200/391	loss 1.874759 (1.874644)	iter_time 0.005098 (0.008751)
Epoch 42/100	It 240/391	loss 1.833282 (1.875475)	iter_time 0.005075 (0.008812)
Epoch 42/100	It 280/391	loss 1.808516 (1.876633)	iter_time 0.004929 (0.008618)
Epoch 42/100	It 320/391	loss 1.703694 (1.878738)	iter_time 0.004982 (0.008589)
Epoch 42/100	It 360/391	loss 1.881510 (1.876178)	iter_time 0.005171 (0.008506)


 43%|██████████████████████████████████████████████████████████▉                                                                              | 43/100 [04:23<05:46,  6.07s/it]

Epoch 42/100	val_acc 45.72%
Epoch 43/100	It 0/391	loss 1.779359 (1.779359)	iter_time 0.097368 (0.097368)
Epoch 43/100	It 40/391	loss 1.949594 (1.877766)	iter_time 0.004970 (0.010301)
Epoch 43/100	It 80/391	loss 1.959974 (1.864332)	iter_time 0.004786 (0.009312)
Epoch 43/100	It 120/391	loss 1.869918 (1.863076)	iter_time 0.005360 (0.009083)
Epoch 43/100	It 160/391	loss 1.634579 (1.864431)	iter_time 0.014446 (0.008840)
Epoch 43/100	It 200/391	loss 2.081902 (1.867995)	iter_time 0.004728 (0.008745)
Epoch 43/100	It 240/391	loss 1.891291 (1.868571)	iter_time 0.004965 (0.008772)
Epoch 43/100	It 280/391	loss 1.794331 (1.867617)	iter_time 0.022295 (0.008697)
Epoch 43/100	It 320/391	loss 2.031013 (1.863519)	iter_time 0.004888 (0.008569)
Epoch 43/100	It 360/391	loss 1.989572 (1.863225)	iter_time 0.005110 (0.008526)


 44%|████████████████████████████████████████████████████████████▎                                                                            | 44/100 [04:29<05:41,  6.10s/it]

Epoch 43/100	val_acc 45.91%
Epoch 44/100	It 0/391	loss 1.886491 (1.886491)	iter_time 0.099013 (0.099013)
Epoch 44/100	It 40/391	loss 1.635990 (1.839928)	iter_time 0.005033 (0.010686)
Epoch 44/100	It 80/391	loss 2.057750 (1.848387)	iter_time 0.004733 (0.009606)
Epoch 44/100	It 120/391	loss 1.844370 (1.844722)	iter_time 0.011761 (0.008913)
Epoch 44/100	It 160/391	loss 1.751108 (1.853144)	iter_time 0.004792 (0.008786)
Epoch 44/100	It 200/391	loss 2.055668 (1.858647)	iter_time 0.021197 (0.008701)
Epoch 44/100	It 240/391	loss 1.468319 (1.859159)	iter_time 0.004960 (0.008569)
Epoch 44/100	It 280/391	loss 1.793771 (1.860034)	iter_time 0.004802 (0.008487)
Epoch 44/100	It 320/391	loss 1.837081 (1.861747)	iter_time 0.006458 (0.008395)
Epoch 44/100	It 360/391	loss 1.773946 (1.859320)	iter_time 0.005241 (0.008388)


 45%|█████████████████████████████████████████████████████████████▋                                                                           | 45/100 [04:35<05:34,  6.08s/it]

Epoch 44/100	val_acc 45.86%
Epoch 45/100	It 0/391	loss 2.019575 (2.019575)	iter_time 0.118457 (0.118457)
Epoch 45/100	It 40/391	loss 1.920431 (1.882934)	iter_time 0.005043 (0.010505)
Epoch 45/100	It 80/391	loss 1.781880 (1.836569)	iter_time 0.005036 (0.009619)
Epoch 45/100	It 120/391	loss 1.915912 (1.838677)	iter_time 0.005039 (0.008885)
Epoch 45/100	It 160/391	loss 1.788059 (1.841977)	iter_time 0.004994 (0.008756)
Epoch 45/100	It 200/391	loss 1.848480 (1.840907)	iter_time 0.004965 (0.008585)
Epoch 45/100	It 240/391	loss 1.735436 (1.848545)	iter_time 0.004938 (0.008371)
Epoch 45/100	It 280/391	loss 1.446943 (1.849677)	iter_time 0.004961 (0.008300)
Epoch 45/100	It 320/391	loss 1.785530 (1.855671)	iter_time 0.005382 (0.008323)
Epoch 45/100	It 360/391	loss 1.945515 (1.859124)	iter_time 0.005001 (0.008302)


 46%|███████████████████████████████████████████████████████████████                                                                          | 46/100 [04:41<05:27,  6.06s/it]

Epoch 45/100	val_acc 46.11%
Epoch 46/100	It 0/391	loss 1.883288 (1.883288)	iter_time 0.099113 (0.099113)
Epoch 46/100	It 40/391	loss 1.852424 (1.831275)	iter_time 0.005053 (0.011024)
Epoch 46/100	It 80/391	loss 1.663879 (1.828411)	iter_time 0.005101 (0.009357)
Epoch 46/100	It 120/391	loss 1.795899 (1.820559)	iter_time 0.005051 (0.008828)
Epoch 46/100	It 160/391	loss 1.776611 (1.833638)	iter_time 0.004959 (0.008683)
Epoch 46/100	It 200/391	loss 1.943781 (1.837756)	iter_time 0.004868 (0.008602)
Epoch 46/100	It 240/391	loss 1.780132 (1.842661)	iter_time 0.016990 (0.008476)
Epoch 46/100	It 280/391	loss 2.060698 (1.847835)	iter_time 0.004941 (0.008448)
Epoch 46/100	It 320/391	loss 1.608659 (1.847429)	iter_time 0.004918 (0.008363)
Epoch 46/100	It 360/391	loss 1.964764 (1.850628)	iter_time 0.005002 (0.008307)


 47%|████████████████████████████████████████████████████████████████▍                                                                        | 47/100 [04:47<05:21,  6.06s/it]

Epoch 46/100	val_acc 46.12%
Epoch 47/100	It 0/391	loss 1.767545 (1.767545)	iter_time 0.119506 (0.119506)
Epoch 47/100	It 40/391	loss 1.791432 (1.827247)	iter_time 0.004974 (0.011097)
Epoch 47/100	It 80/391	loss 1.864599 (1.832355)	iter_time 0.005247 (0.009633)
Epoch 47/100	It 120/391	loss 1.877443 (1.835947)	iter_time 0.004879 (0.009029)
Epoch 47/100	It 160/391	loss 2.010062 (1.844227)	iter_time 0.004703 (0.008759)
Epoch 47/100	It 200/391	loss 2.031415 (1.843600)	iter_time 0.005052 (0.008640)
Epoch 47/100	It 240/391	loss 1.866974 (1.845132)	iter_time 0.015388 (0.008584)
Epoch 47/100	It 280/391	loss 1.568604 (1.848623)	iter_time 0.005026 (0.008485)
Epoch 47/100	It 320/391	loss 1.690059 (1.851271)	iter_time 0.004963 (0.008504)
Epoch 47/100	It 360/391	loss 1.640350 (1.850834)	iter_time 0.020405 (0.008488)


 48%|█████████████████████████████████████████████████████████████████▊                                                                       | 48/100 [04:53<05:16,  6.08s/it]

Epoch 47/100	val_acc 46.06%
Epoch 48/100	It 0/391	loss 2.099349 (2.099349)	iter_time 0.100062 (0.100062)
Epoch 48/100	It 40/391	loss 1.731694 (1.819744)	iter_time 0.004699 (0.010848)
Epoch 48/100	It 80/391	loss 1.854413 (1.822116)	iter_time 0.004726 (0.010232)
Epoch 48/100	It 120/391	loss 1.919333 (1.836254)	iter_time 0.016005 (0.009638)
Epoch 48/100	It 160/391	loss 1.812513 (1.829069)	iter_time 0.004982 (0.009168)
Epoch 48/100	It 200/391	loss 2.064372 (1.835197)	iter_time 0.005030 (0.009018)
Epoch 48/100	It 240/391	loss 2.041222 (1.836437)	iter_time 0.005080 (0.008916)
Epoch 48/100	It 280/391	loss 1.788022 (1.842646)	iter_time 0.004958 (0.008783)
Epoch 48/100	It 320/391	loss 1.863853 (1.840037)	iter_time 0.005053 (0.008760)
Epoch 48/100	It 360/391	loss 1.973518 (1.844321)	iter_time 0.005106 (0.008716)


 49%|███████████████████████████████████████████████████████████████████▏                                                                     | 49/100 [04:59<05:12,  6.13s/it]

Epoch 48/100	val_acc 46.43%
Epoch 49/100	It 0/391	loss 1.779652 (1.779652)	iter_time 0.100362 (0.100362)
Epoch 49/100	It 40/391	loss 1.813184 (1.824904)	iter_time 0.004958 (0.011080)
Epoch 49/100	It 80/391	loss 1.783603 (1.820920)	iter_time 0.004933 (0.009656)
Epoch 49/100	It 120/391	loss 2.024935 (1.819414)	iter_time 0.004936 (0.009136)
Epoch 49/100	It 160/391	loss 1.919198 (1.829611)	iter_time 0.005086 (0.008817)
Epoch 49/100	It 200/391	loss 1.872248 (1.840345)	iter_time 0.005311 (0.008502)
Epoch 49/100	It 240/391	loss 2.012169 (1.843376)	iter_time 0.005064 (0.008397)
Epoch 49/100	It 280/391	loss 2.198581 (1.847922)	iter_time 0.025401 (0.008338)
Epoch 49/100	It 320/391	loss 1.780651 (1.845019)	iter_time 0.005005 (0.008268)
Epoch 49/100	It 360/391	loss 2.014597 (1.848274)	iter_time 0.004781 (0.008236)


 50%|████████████████████████████████████████████████████████████████████▌                                                                    | 50/100 [05:05<05:03,  6.08s/it]

Epoch 49/100	val_acc 46.32%
Epoch 50/100	It 0/391	loss 1.692702 (1.692702)	iter_time 0.116336 (0.116336)
Epoch 50/100	It 40/391	loss 1.743662 (1.824581)	iter_time 0.005261 (0.010679)
Epoch 50/100	It 80/391	loss 1.662584 (1.819268)	iter_time 0.004892 (0.009221)
Epoch 50/100	It 120/391	loss 1.699483 (1.826654)	iter_time 0.010671 (0.008751)
Epoch 50/100	It 160/391	loss 1.872967 (1.828844)	iter_time 0.004935 (0.008576)
Epoch 50/100	It 200/391	loss 1.711084 (1.831266)	iter_time 0.024384 (0.008526)
Epoch 50/100	It 240/391	loss 2.024964 (1.831832)	iter_time 0.005150 (0.008396)
Epoch 50/100	It 280/391	loss 1.703608 (1.830416)	iter_time 0.004928 (0.008346)
Epoch 50/100	It 320/391	loss 1.693646 (1.834045)	iter_time 0.005456 (0.008308)
Epoch 50/100	It 360/391	loss 1.811934 (1.828110)	iter_time 0.004772 (0.008372)


 51%|█████████████████████████████████████████████████████████████████████▊                                                                   | 51/100 [05:11<04:56,  6.04s/it]

Epoch 50/100	val_acc 46.61%
Epoch 51/100	It 0/391	loss 1.907531 (1.907531)	iter_time 0.117584 (0.117584)
Epoch 51/100	It 40/391	loss 1.791132 (1.858151)	iter_time 0.004765 (0.011158)
Epoch 51/100	It 80/391	loss 1.694652 (1.869809)	iter_time 0.018262 (0.010090)
Epoch 51/100	It 120/391	loss 1.891285 (1.856325)	iter_time 0.005123 (0.009214)
Epoch 51/100	It 160/391	loss 1.896857 (1.857220)	iter_time 0.005083 (0.008990)
Epoch 51/100	It 200/391	loss 2.083375 (1.853540)	iter_time 0.005439 (0.008948)
Epoch 51/100	It 240/391	loss 2.013798 (1.851770)	iter_time 0.005057 (0.008716)
Epoch 51/100	It 280/391	loss 1.847909 (1.848300)	iter_time 0.005136 (0.008666)
Epoch 51/100	It 320/391	loss 1.649237 (1.843805)	iter_time 0.004734 (0.008723)
Epoch 51/100	It 360/391	loss 1.849049 (1.844060)	iter_time 0.004686 (0.008566)


 52%|███████████████████████████████████████████████████████████████████████▏                                                                 | 52/100 [05:17<04:51,  6.07s/it]

Epoch 51/100	val_acc 46.63%
Epoch 52/100	It 0/391	loss 1.946375 (1.946375)	iter_time 0.099965 (0.099965)
Epoch 52/100	It 40/391	loss 1.777582 (1.850277)	iter_time 0.004880 (0.010794)
Epoch 52/100	It 80/391	loss 1.789916 (1.829123)	iter_time 0.004982 (0.010006)
Epoch 52/100	It 120/391	loss 1.844322 (1.819154)	iter_time 0.010481 (0.009049)
Epoch 52/100	It 160/391	loss 1.734530 (1.817794)	iter_time 0.005118 (0.008762)
Epoch 52/100	It 200/391	loss 1.604853 (1.820386)	iter_time 0.023805 (0.008671)
Epoch 52/100	It 240/391	loss 1.514610 (1.813538)	iter_time 0.004874 (0.008584)
Epoch 52/100	It 280/391	loss 1.871034 (1.814826)	iter_time 0.005112 (0.008549)
Epoch 52/100	It 320/391	loss 1.993520 (1.821535)	iter_time 0.023340 (0.008509)
Epoch 52/100	It 360/391	loss 2.051559 (1.823783)	iter_time 0.005208 (0.008361)


 53%|████████████████████████████████████████████████████████████████████████▌                                                                | 53/100 [05:23<04:45,  6.07s/it]

Epoch 52/100	val_acc 46.63%
Epoch 53/100	It 0/391	loss 1.787736 (1.787736)	iter_time 0.099431 (0.099431)
Epoch 53/100	It 40/391	loss 1.880849 (1.822422)	iter_time 0.004940 (0.010894)
Epoch 53/100	It 80/391	loss 1.777891 (1.811822)	iter_time 0.005275 (0.009285)
Epoch 53/100	It 120/391	loss 1.804060 (1.821228)	iter_time 0.005027 (0.008740)
Epoch 53/100	It 160/391	loss 1.688365 (1.823914)	iter_time 0.005046 (0.008730)
Epoch 53/100	It 200/391	loss 1.707112 (1.819804)	iter_time 0.005170 (0.008414)
Epoch 53/100	It 240/391	loss 2.135828 (1.823991)	iter_time 0.004918 (0.008376)
Epoch 53/100	It 280/391	loss 1.740788 (1.826435)	iter_time 0.004948 (0.008355)
Epoch 53/100	It 320/391	loss 1.480659 (1.823544)	iter_time 0.026077 (0.008345)
Epoch 53/100	It 360/391	loss 1.796116 (1.825034)	iter_time 0.005450 (0.008321)


 54%|█████████████████████████████████████████████████████████████████████████▉                                                               | 54/100 [05:29<04:38,  6.06s/it]

Epoch 53/100	val_acc 46.54%
Epoch 54/100	It 0/391	loss 1.628720 (1.628720)	iter_time 0.099610 (0.099610)
Epoch 54/100	It 40/391	loss 1.734874 (1.779030)	iter_time 0.004969 (0.010466)
Epoch 54/100	It 80/391	loss 1.602041 (1.787690)	iter_time 0.011668 (0.009225)
Epoch 54/100	It 120/391	loss 1.821259 (1.788305)	iter_time 0.005069 (0.008886)
Epoch 54/100	It 160/391	loss 1.897875 (1.801644)	iter_time 0.004959 (0.008692)
Epoch 54/100	It 200/391	loss 1.854619 (1.807465)	iter_time 0.028064 (0.008542)
Epoch 54/100	It 240/391	loss 2.062162 (1.810362)	iter_time 0.004774 (0.008437)
Epoch 54/100	It 280/391	loss 1.977943 (1.815453)	iter_time 0.004950 (0.008338)
Epoch 54/100	It 320/391	loss 1.927905 (1.821424)	iter_time 0.004806 (0.008288)
Epoch 54/100	It 360/391	loss 1.675367 (1.819581)	iter_time 0.004697 (0.008278)


 55%|███████████████████████████████████████████████████████████████████████████▎                                                             | 55/100 [05:36<04:32,  6.06s/it]

Epoch 54/100	val_acc 46.61%
Epoch 55/100	It 0/391	loss 1.875499 (1.875499)	iter_time 0.100407 (0.100407)
Epoch 55/100	It 40/391	loss 2.028631 (1.811985)	iter_time 0.004869 (0.011021)
Epoch 55/100	It 80/391	loss 1.629682 (1.794858)	iter_time 0.004630 (0.009695)
Epoch 55/100	It 120/391	loss 2.087911 (1.790707)	iter_time 0.004784 (0.009098)
Epoch 55/100	It 160/391	loss 1.557871 (1.795192)	iter_time 0.004913 (0.008986)
Epoch 55/100	It 200/391	loss 1.800359 (1.797371)	iter_time 0.004969 (0.008828)
Epoch 55/100	It 240/391	loss 1.991267 (1.804906)	iter_time 0.005059 (0.008735)
Epoch 55/100	It 280/391	loss 1.706105 (1.810128)	iter_time 0.005264 (0.008647)
Epoch 55/100	It 320/391	loss 1.782693 (1.816110)	iter_time 0.004857 (0.008588)
Epoch 55/100	It 360/391	loss 1.655822 (1.819187)	iter_time 0.024027 (0.008581)


 56%|████████████████████████████████████████████████████████████████████████████▋                                                            | 56/100 [05:42<04:29,  6.13s/it]

Epoch 55/100	val_acc 46.74%
Epoch 56/100	It 0/391	loss 1.887132 (1.887132)	iter_time 0.100446 (0.100446)
Epoch 56/100	It 40/391	loss 1.892051 (1.806762)	iter_time 0.005062 (0.010461)
Epoch 56/100	It 80/391	loss 1.694761 (1.800531)	iter_time 0.004765 (0.009260)
Epoch 56/100	It 120/391	loss 1.922372 (1.809747)	iter_time 0.010260 (0.009241)
Epoch 56/100	It 160/391	loss 1.585245 (1.806449)	iter_time 0.005102 (0.009086)
Epoch 56/100	It 200/391	loss 1.562497 (1.805376)	iter_time 0.005114 (0.008814)
Epoch 56/100	It 240/391	loss 1.928153 (1.813173)	iter_time 0.005220 (0.008608)
Epoch 56/100	It 280/391	loss 1.862714 (1.816376)	iter_time 0.004947 (0.008504)
Epoch 56/100	It 320/391	loss 1.838410 (1.814914)	iter_time 0.005137 (0.008437)
Epoch 56/100	It 360/391	loss 1.889056 (1.811218)	iter_time 0.004946 (0.008462)


 57%|██████████████████████████████████████████████████████████████████████████████                                                           | 57/100 [05:48<04:22,  6.11s/it]

Epoch 56/100	val_acc 46.83%
Epoch 57/100	It 0/391	loss 1.808322 (1.808322)	iter_time 0.100554 (0.100554)
Epoch 57/100	It 40/391	loss 1.827528 (1.779454)	iter_time 0.004942 (0.010849)
Epoch 57/100	It 80/391	loss 1.806043 (1.755613)	iter_time 0.005215 (0.009514)
Epoch 57/100	It 120/391	loss 2.111962 (1.768303)	iter_time 0.005203 (0.009073)
Epoch 57/100	It 160/391	loss 1.769012 (1.779294)	iter_time 0.004643 (0.009067)
Epoch 57/100	It 200/391	loss 1.724300 (1.780366)	iter_time 0.005630 (0.009002)
Epoch 57/100	It 240/391	loss 2.036187 (1.785188)	iter_time 0.004911 (0.008804)
Epoch 57/100	It 280/391	loss 1.731674 (1.793742)	iter_time 0.004980 (0.008681)
Epoch 57/100	It 320/391	loss 1.887993 (1.790714)	iter_time 0.005059 (0.008609)
Epoch 57/100	It 360/391	loss 1.695069 (1.792002)	iter_time 0.004953 (0.008541)


 58%|███████████████████████████████████████████████████████████████████████████████▍                                                         | 58/100 [05:54<04:16,  6.10s/it]

Epoch 57/100	val_acc 46.73%
Epoch 58/100	It 0/391	loss 1.802693 (1.802693)	iter_time 0.100618 (0.100618)
Epoch 58/100	It 40/391	loss 1.811342 (1.796992)	iter_time 0.004976 (0.010423)
Epoch 58/100	It 80/391	loss 1.968375 (1.806518)	iter_time 0.004990 (0.009269)
Epoch 58/100	It 120/391	loss 1.705678 (1.793289)	iter_time 0.004963 (0.008670)
Epoch 58/100	It 160/391	loss 1.876295 (1.800037)	iter_time 0.005098 (0.008528)
Epoch 58/100	It 200/391	loss 1.840159 (1.794976)	iter_time 0.018092 (0.008497)
Epoch 58/100	It 240/391	loss 1.467432 (1.796763)	iter_time 0.024386 (0.008448)
Epoch 58/100	It 280/391	loss 2.001454 (1.798078)	iter_time 0.005459 (0.008389)
Epoch 58/100	It 320/391	loss 1.982185 (1.799430)	iter_time 0.005066 (0.008314)
Epoch 58/100	It 360/391	loss 1.661116 (1.797379)	iter_time 0.005045 (0.008318)


 59%|████████████████████████████████████████████████████████████████████████████████▊                                                        | 59/100 [06:00<04:08,  6.07s/it]

Epoch 58/100	val_acc 46.67%
Epoch 59/100	It 0/391	loss 1.828000 (1.828000)	iter_time 0.101319 (0.101319)
Epoch 59/100	It 40/391	loss 1.733769 (1.740650)	iter_time 0.004870 (0.010467)
Epoch 59/100	It 80/391	loss 1.821586 (1.783390)	iter_time 0.004929 (0.009151)
Epoch 59/100	It 120/391	loss 1.855976 (1.783065)	iter_time 0.004949 (0.008902)
Epoch 59/100	It 160/391	loss 1.650560 (1.785904)	iter_time 0.005229 (0.008575)
Epoch 59/100	It 200/391	loss 1.452328 (1.790711)	iter_time 0.005353 (0.008320)
Epoch 59/100	It 240/391	loss 1.628664 (1.787492)	iter_time 0.004725 (0.008206)
Epoch 59/100	It 280/391	loss 2.010527 (1.787097)	iter_time 0.007086 (0.008213)
Epoch 59/100	It 320/391	loss 1.833138 (1.793175)	iter_time 0.004838 (0.008260)
Epoch 59/100	It 360/391	loss 1.850721 (1.799556)	iter_time 0.004953 (0.008248)


 60%|██████████████████████████████████████████████████████████████████████████████████▏                                                      | 60/100 [06:06<04:03,  6.10s/it]

Epoch 59/100	val_acc 46.91%
Epoch 60/100	It 0/391	loss 1.994521 (1.994521)	iter_time 0.118787 (0.118787)
Epoch 60/100	It 40/391	loss 1.722680 (1.770209)	iter_time 0.005061 (0.010846)
Epoch 60/100	It 80/391	loss 1.810429 (1.756887)	iter_time 0.005035 (0.009636)
Epoch 60/100	It 120/391	loss 1.956586 (1.782348)	iter_time 0.024132 (0.009014)
Epoch 60/100	It 160/391	loss 1.870727 (1.779125)	iter_time 0.005007 (0.008697)
Epoch 60/100	It 200/391	loss 2.066286 (1.776667)	iter_time 0.005393 (0.008524)
Epoch 60/100	It 240/391	loss 1.812310 (1.775979)	iter_time 0.004755 (0.008454)
Epoch 60/100	It 280/391	loss 2.010548 (1.775043)	iter_time 0.004820 (0.008593)
Epoch 60/100	It 320/391	loss 1.871224 (1.775384)	iter_time 0.004775 (0.008616)
Epoch 60/100	It 360/391	loss 1.816809 (1.773896)	iter_time 0.004575 (0.008707)


 61%|███████████████████████████████████████████████████████████████████████████████████▌                                                     | 61/100 [06:12<03:58,  6.11s/it]

Epoch 60/100	val_acc 47.29%
Epoch 61/100	It 0/391	loss 1.656641 (1.656641)	iter_time 0.104172 (0.104172)
Epoch 61/100	It 40/391	loss 1.847527 (1.793559)	iter_time 0.004897 (0.011196)
Epoch 61/100	It 80/391	loss 1.766315 (1.775465)	iter_time 0.005477 (0.009432)
Epoch 61/100	It 120/391	loss 2.018255 (1.771258)	iter_time 0.022805 (0.009056)
Epoch 61/100	It 160/391	loss 1.583462 (1.775844)	iter_time 0.005090 (0.008770)
Epoch 61/100	It 200/391	loss 1.678100 (1.776437)	iter_time 0.004992 (0.008575)
Epoch 61/100	It 240/391	loss 1.516686 (1.783106)	iter_time 0.005339 (0.008555)
Epoch 61/100	It 280/391	loss 1.675291 (1.779094)	iter_time 0.017300 (0.008408)
Epoch 61/100	It 320/391	loss 1.640298 (1.780848)	iter_time 0.004648 (0.008399)
Epoch 61/100	It 360/391	loss 1.667754 (1.778322)	iter_time 0.005000 (0.008395)


 62%|████████████████████████████████████████████████████████████████████████████████████▉                                                    | 62/100 [06:18<03:53,  6.14s/it]

Epoch 61/100	val_acc 47.35%
Epoch 62/100	It 0/391	loss 1.604336 (1.604336)	iter_time 0.118871 (0.118871)
Epoch 62/100	It 40/391	loss 1.521875 (1.735012)	iter_time 0.004901 (0.010837)
Epoch 62/100	It 80/391	loss 1.993508 (1.756421)	iter_time 0.004737 (0.009731)
Epoch 62/100	It 120/391	loss 1.596814 (1.764022)	iter_time 0.024999 (0.009272)
Epoch 62/100	It 160/391	loss 1.456852 (1.756123)	iter_time 0.005024 (0.008878)
Epoch 62/100	It 200/391	loss 1.833204 (1.765264)	iter_time 0.004590 (0.008789)
Epoch 62/100	It 240/391	loss 2.121809 (1.770579)	iter_time 0.015455 (0.008623)
Epoch 62/100	It 280/391	loss 1.879136 (1.777522)	iter_time 0.004997 (0.008527)
Epoch 62/100	It 320/391	loss 1.499715 (1.771694)	iter_time 0.024515 (0.008450)
Epoch 62/100	It 360/391	loss 1.736768 (1.769935)	iter_time 0.004931 (0.008360)


 63%|██████████████████████████████████████████████████████████████████████████████████████▎                                                  | 63/100 [06:25<03:46,  6.12s/it]

Epoch 62/100	val_acc 47.4%
Epoch 63/100	It 0/391	loss 1.833598 (1.833598)	iter_time 0.101080 (0.101080)
Epoch 63/100	It 40/391	loss 1.665804 (1.779181)	iter_time 0.004970 (0.010812)
Epoch 63/100	It 80/391	loss 2.083157 (1.781480)	iter_time 0.019539 (0.009686)
Epoch 63/100	It 120/391	loss 1.877378 (1.765772)	iter_time 0.005113 (0.008988)
Epoch 63/100	It 160/391	loss 1.608310 (1.771320)	iter_time 0.005043 (0.008660)
Epoch 63/100	It 200/391	loss 1.790243 (1.764125)	iter_time 0.019539 (0.008539)
Epoch 63/100	It 240/391	loss 1.518654 (1.769705)	iter_time 0.004962 (0.008446)
Epoch 63/100	It 280/391	loss 1.875137 (1.770205)	iter_time 0.004955 (0.008409)
Epoch 63/100	It 320/391	loss 1.952772 (1.772538)	iter_time 0.005160 (0.008339)
Epoch 63/100	It 360/391	loss 1.920574 (1.771398)	iter_time 0.005141 (0.008329)


 64%|███████████████████████████████████████████████████████████████████████████████████████▋                                                 | 64/100 [06:31<03:40,  6.14s/it]

Epoch 63/100	val_acc 47.4%
Epoch 64/100	It 0/391	loss 1.653319 (1.653319)	iter_time 0.100865 (0.100865)
Epoch 64/100	It 40/391	loss 1.932267 (1.706927)	iter_time 0.005051 (0.009972)
Epoch 64/100	It 80/391	loss 2.054377 (1.729276)	iter_time 0.004814 (0.009386)
Epoch 64/100	It 120/391	loss 1.966934 (1.740374)	iter_time 0.031243 (0.009150)
Epoch 64/100	It 160/391	loss 1.702542 (1.741799)	iter_time 0.005057 (0.008942)
Epoch 64/100	It 200/391	loss 1.982283 (1.747066)	iter_time 0.005150 (0.008773)
Epoch 64/100	It 240/391	loss 2.046949 (1.748673)	iter_time 0.024991 (0.008712)
Epoch 64/100	It 280/391	loss 1.645151 (1.748063)	iter_time 0.004985 (0.008581)
Epoch 64/100	It 320/391	loss 1.701149 (1.750693)	iter_time 0.004702 (0.008569)
Epoch 64/100	It 360/391	loss 1.818175 (1.753529)	iter_time 0.023388 (0.008556)


 65%|█████████████████████████████████████████████████████████████████████████████████████████                                                | 65/100 [06:37<03:35,  6.17s/it]

Epoch 64/100	val_acc 47.5%
Epoch 65/100	It 0/391	loss 1.942860 (1.942860)	iter_time 0.118269 (0.118269)
Epoch 65/100	It 40/391	loss 2.062432 (1.791176)	iter_time 0.004718 (0.011004)
Epoch 65/100	It 80/391	loss 1.770541 (1.780747)	iter_time 0.022732 (0.009930)
Epoch 65/100	It 120/391	loss 1.626028 (1.771363)	iter_time 0.004966 (0.009183)
Epoch 65/100	It 160/391	loss 1.773363 (1.761589)	iter_time 0.004955 (0.008872)
Epoch 65/100	It 200/391	loss 1.822244 (1.762466)	iter_time 0.009493 (0.008595)
Epoch 65/100	It 240/391	loss 1.568844 (1.762791)	iter_time 0.030894 (0.008711)
Epoch 65/100	It 280/391	loss 1.787842 (1.765360)	iter_time 0.005081 (0.008532)
Epoch 65/100	It 320/391	loss 1.654701 (1.768718)	iter_time 0.027734 (0.008638)
Epoch 65/100	It 360/391	loss 1.758773 (1.769100)	iter_time 0.004761 (0.008660)


 66%|██████████████████████████████████████████████████████████████████████████████████████████▍                                              | 66/100 [06:43<03:30,  6.18s/it]

Epoch 65/100	val_acc 47.5%
Epoch 66/100	It 0/391	loss 1.747837 (1.747837)	iter_time 0.119430 (0.119430)
Epoch 66/100	It 40/391	loss 1.665020 (1.741407)	iter_time 0.005239 (0.010216)
Epoch 66/100	It 80/391	loss 1.698803 (1.753032)	iter_time 0.024241 (0.009363)
Epoch 66/100	It 120/391	loss 1.749082 (1.764020)	iter_time 0.005077 (0.008714)
Epoch 66/100	It 160/391	loss 1.952106 (1.765389)	iter_time 0.005437 (0.008543)
Epoch 66/100	It 200/391	loss 1.901769 (1.765467)	iter_time 0.005344 (0.008272)
Epoch 66/100	It 240/391	loss 1.709179 (1.763191)	iter_time 0.005277 (0.008378)
Epoch 66/100	It 280/391	loss 1.771149 (1.762460)	iter_time 0.031083 (0.008425)
Epoch 66/100	It 320/391	loss 1.632040 (1.757878)	iter_time 0.005237 (0.008464)
Epoch 66/100	It 360/391	loss 1.669803 (1.760917)	iter_time 0.004935 (0.008371)


 67%|███████████████████████████████████████████████████████████████████████████████████████████▊                                             | 67/100 [06:49<03:23,  6.15s/it]

Epoch 66/100	val_acc 47.53%
Epoch 67/100	It 0/391	loss 2.008635 (2.008635)	iter_time 0.115771 (0.115771)
Epoch 67/100	It 40/391	loss 1.788597 (1.738011)	iter_time 0.004731 (0.011378)
Epoch 67/100	It 80/391	loss 1.802635 (1.751054)	iter_time 0.004847 (0.010380)
Epoch 67/100	It 120/391	loss 2.028998 (1.754220)	iter_time 0.005013 (0.009642)
Epoch 67/100	It 160/391	loss 1.690832 (1.761212)	iter_time 0.005032 (0.009242)
Epoch 67/100	It 200/391	loss 1.900356 (1.760961)	iter_time 0.004941 (0.008831)
Epoch 67/100	It 240/391	loss 1.413575 (1.756552)	iter_time 0.004913 (0.008720)
Epoch 67/100	It 280/391	loss 1.679632 (1.756164)	iter_time 0.004942 (0.008592)
Epoch 67/100	It 320/391	loss 1.804123 (1.755404)	iter_time 0.005229 (0.008496)
Epoch 67/100	It 360/391	loss 1.798922 (1.755174)	iter_time 0.025640 (0.008510)


 68%|█████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 68/100 [06:55<03:16,  6.15s/it]

Epoch 67/100	val_acc 47.42%
Epoch 68/100	It 0/391	loss 1.633999 (1.633999)	iter_time 0.120511 (0.120511)
Epoch 68/100	It 40/391	loss 1.676176 (1.787845)	iter_time 0.005021 (0.010755)
Epoch 68/100	It 80/391	loss 1.634218 (1.764080)	iter_time 0.005244 (0.009671)
Epoch 68/100	It 120/391	loss 1.710465 (1.752576)	iter_time 0.030489 (0.009524)
Epoch 68/100	It 160/391	loss 1.793255 (1.760625)	iter_time 0.004792 (0.009336)
Epoch 68/100	It 200/391	loss 1.922671 (1.761091)	iter_time 0.005531 (0.009068)
Epoch 68/100	It 240/391	loss 1.981645 (1.756479)	iter_time 0.005651 (0.008790)
Epoch 68/100	It 280/391	loss 1.639254 (1.757209)	iter_time 0.004987 (0.008736)
Epoch 68/100	It 320/391	loss 1.610013 (1.760567)	iter_time 0.004771 (0.008621)
Epoch 68/100	It 360/391	loss 1.922773 (1.761766)	iter_time 0.005013 (0.008649)


 69%|██████████████████████████████████████████████████████████████████████████████████████████████▌                                          | 69/100 [07:02<03:10,  6.15s/it]

Epoch 68/100	val_acc 47.48%
Epoch 69/100	It 0/391	loss 1.741722 (1.741722)	iter_time 0.118985 (0.118985)
Epoch 69/100	It 40/391	loss 1.682523 (1.739829)	iter_time 0.005254 (0.011545)
Epoch 69/100	It 80/391	loss 1.790306 (1.754551)	iter_time 0.004981 (0.009750)
Epoch 69/100	It 120/391	loss 1.737219 (1.758750)	iter_time 0.020314 (0.009136)
Epoch 69/100	It 160/391	loss 1.630934 (1.762665)	iter_time 0.005203 (0.008940)
Epoch 69/100	It 200/391	loss 1.959212 (1.768585)	iter_time 0.019014 (0.008682)
Epoch 69/100	It 240/391	loss 1.970472 (1.765803)	iter_time 0.005168 (0.008600)
Epoch 69/100	It 280/391	loss 1.988034 (1.766940)	iter_time 0.004948 (0.008534)
Epoch 69/100	It 320/391	loss 1.655371 (1.765222)	iter_time 0.005296 (0.008410)
Epoch 69/100	It 360/391	loss 1.751995 (1.761980)	iter_time 0.004975 (0.008300)


 70%|███████████████████████████████████████████████████████████████████████████████████████████████▉                                         | 70/100 [07:08<03:02,  6.10s/it]

Epoch 69/100	val_acc 47.57%
Epoch 70/100	It 0/391	loss 1.662210 (1.662210)	iter_time 0.118283 (0.118283)
Epoch 70/100	It 40/391	loss 2.010796 (1.742660)	iter_time 0.026212 (0.011044)
Epoch 70/100	It 80/391	loss 1.860893 (1.741390)	iter_time 0.004970 (0.009763)
Epoch 70/100	It 120/391	loss 1.526410 (1.746937)	iter_time 0.005178 (0.009071)
Epoch 70/100	It 160/391	loss 1.592347 (1.750266)	iter_time 0.005201 (0.008820)
Epoch 70/100	It 200/391	loss 1.687498 (1.751468)	iter_time 0.005263 (0.008700)
Epoch 70/100	It 240/391	loss 1.682490 (1.753669)	iter_time 0.004931 (0.008640)
Epoch 70/100	It 280/391	loss 1.887255 (1.765019)	iter_time 0.004975 (0.008377)
Epoch 70/100	It 320/391	loss 1.852861 (1.765577)	iter_time 0.004973 (0.008359)
Epoch 70/100	It 360/391	loss 1.826161 (1.760773)	iter_time 0.028089 (0.008367)


 71%|█████████████████████████████████████████████████████████████████████████████████████████████████▎                                       | 71/100 [07:14<02:56,  6.09s/it]

Epoch 70/100	val_acc 47.68%
Epoch 71/100	It 0/391	loss 1.646261 (1.646261)	iter_time 0.118251 (0.118251)
Epoch 71/100	It 40/391	loss 1.771917 (1.767257)	iter_time 0.005036 (0.011633)
Epoch 71/100	It 80/391	loss 1.653522 (1.763440)	iter_time 0.005348 (0.010368)
Epoch 71/100	It 120/391	loss 1.676184 (1.755301)	iter_time 0.004887 (0.009886)
Epoch 71/100	It 160/391	loss 1.801726 (1.767841)	iter_time 0.005013 (0.009655)
Epoch 71/100	It 200/391	loss 1.728998 (1.756218)	iter_time 0.004953 (0.009482)
Epoch 71/100	It 240/391	loss 1.807834 (1.755652)	iter_time 0.004978 (0.009349)
Epoch 71/100	It 280/391	loss 2.183727 (1.755620)	iter_time 0.004981 (0.009153)
Epoch 71/100	It 320/391	loss 1.925217 (1.758344)	iter_time 0.005182 (0.008944)
Epoch 71/100	It 360/391	loss 1.586238 (1.758891)	iter_time 0.005217 (0.008865)


 72%|██████████████████████████████████████████████████████████████████████████████████████████████████▋                                      | 72/100 [07:20<02:51,  6.14s/it]

Epoch 71/100	val_acc 47.65%
Epoch 72/100	It 0/391	loss 1.707600 (1.707600)	iter_time 0.098087 (0.098087)
Epoch 72/100	It 40/391	loss 1.652436 (1.796314)	iter_time 0.004966 (0.010304)
Epoch 72/100	It 80/391	loss 1.737201 (1.781376)	iter_time 0.017682 (0.009289)
Epoch 72/100	It 120/391	loss 1.534500 (1.791834)	iter_time 0.005027 (0.008985)
Epoch 72/100	It 160/391	loss 1.685851 (1.776123)	iter_time 0.005046 (0.008680)
Epoch 72/100	It 200/391	loss 1.838331 (1.773555)	iter_time 0.004951 (0.008631)
Epoch 72/100	It 240/391	loss 1.969464 (1.775980)	iter_time 0.004866 (0.008581)
Epoch 72/100	It 280/391	loss 1.776340 (1.771856)	iter_time 0.004991 (0.008344)
Epoch 72/100	It 320/391	loss 1.636571 (1.772550)	iter_time 0.004719 (0.008353)
Epoch 72/100	It 360/391	loss 1.605594 (1.768594)	iter_time 0.004907 (0.008290)


 73%|████████████████████████████████████████████████████████████████████████████████████████████████████                                     | 73/100 [07:26<02:46,  6.16s/it]

Epoch 72/100	val_acc 47.71%
Epoch 73/100	It 0/391	loss 1.873993 (1.873993)	iter_time 0.100717 (0.100717)
Epoch 73/100	It 40/391	loss 1.614183 (1.764499)	iter_time 0.004885 (0.011491)
Epoch 73/100	It 80/391	loss 1.681409 (1.753502)	iter_time 0.005063 (0.009833)
Epoch 73/100	It 120/391	loss 1.816304 (1.743152)	iter_time 0.005457 (0.009257)
Epoch 73/100	It 160/391	loss 1.760614 (1.752751)	iter_time 0.005056 (0.009070)
Epoch 73/100	It 200/391	loss 1.780163 (1.755651)	iter_time 0.004977 (0.008793)
Epoch 73/100	It 240/391	loss 1.626677 (1.757252)	iter_time 0.004873 (0.008778)
Epoch 73/100	It 280/391	loss 1.626497 (1.765478)	iter_time 0.004987 (0.008601)
Epoch 73/100	It 320/391	loss 1.671058 (1.765715)	iter_time 0.004721 (0.008647)
Epoch 73/100	It 360/391	loss 1.793278 (1.767233)	iter_time 0.005206 (0.008694)


 74%|█████████████████████████████████████████████████████████████████████████████████████████████████████▍                                   | 74/100 [07:32<02:42,  6.24s/it]

Epoch 73/100	val_acc 47.63%
Epoch 74/100	It 0/391	loss 1.546511 (1.546511)	iter_time 0.100853 (0.100853)
Epoch 74/100	It 40/391	loss 1.778149 (1.764812)	iter_time 0.004814 (0.011188)
Epoch 74/100	It 80/391	loss 1.784391 (1.755943)	iter_time 0.004895 (0.010396)
Epoch 74/100	It 120/391	loss 1.940229 (1.759562)	iter_time 0.018692 (0.009565)
Epoch 74/100	It 160/391	loss 1.694430 (1.767398)	iter_time 0.005150 (0.009318)
Epoch 74/100	It 200/391	loss 1.680933 (1.768513)	iter_time 0.017475 (0.008979)
Epoch 74/100	It 240/391	loss 1.800048 (1.765702)	iter_time 0.004999 (0.008767)
Epoch 74/100	It 280/391	loss 1.675145 (1.764309)	iter_time 0.016916 (0.008701)
Epoch 74/100	It 320/391	loss 1.805209 (1.762234)	iter_time 0.005012 (0.008542)
Epoch 74/100	It 360/391	loss 1.746159 (1.762551)	iter_time 0.005024 (0.008533)


 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                                  | 75/100 [07:39<02:35,  6.21s/it]

Epoch 74/100	val_acc 47.68%
Epoch 75/100	It 0/391	loss 1.693752 (1.693752)	iter_time 0.120784 (0.120784)
Epoch 75/100	It 40/391	loss 1.754493 (1.761091)	iter_time 0.005146 (0.011023)
Epoch 75/100	It 80/391	loss 1.877434 (1.762740)	iter_time 0.004985 (0.009263)
Epoch 75/100	It 120/391	loss 1.910940 (1.759364)	iter_time 0.004889 (0.008943)
Epoch 75/100	It 160/391	loss 1.782964 (1.756176)	iter_time 0.023341 (0.008795)
Epoch 75/100	It 200/391	loss 1.755859 (1.757591)	iter_time 0.005018 (0.008513)
Epoch 75/100	It 240/391	loss 1.583685 (1.756692)	iter_time 0.010629 (0.008593)
Epoch 75/100	It 280/391	loss 1.897182 (1.749472)	iter_time 0.004943 (0.008443)
Epoch 75/100	It 320/391	loss 1.822829 (1.749717)	iter_time 0.005095 (0.008460)
Epoch 75/100	It 360/391	loss 1.742893 (1.750891)	iter_time 0.011387 (0.008435)


 76%|████████████████████████████████████████████████████████████████████████████████████████████████████████                                 | 76/100 [07:45<02:28,  6.18s/it]

Epoch 75/100	val_acc 47.7%
Epoch 76/100	It 0/391	loss 1.681933 (1.681933)	iter_time 0.101603 (0.101603)
Epoch 76/100	It 40/391	loss 1.359322 (1.709716)	iter_time 0.005011 (0.011256)
Epoch 76/100	It 80/391	loss 1.563322 (1.723874)	iter_time 0.004971 (0.009548)
Epoch 76/100	It 120/391	loss 1.761490 (1.725204)	iter_time 0.005010 (0.008843)
Epoch 76/100	It 160/391	loss 1.790502 (1.728269)	iter_time 0.005074 (0.008710)
Epoch 76/100	It 200/391	loss 1.928641 (1.733402)	iter_time 0.005234 (0.008512)
Epoch 76/100	It 240/391	loss 2.004827 (1.742388)	iter_time 0.005141 (0.008468)
Epoch 76/100	It 280/391	loss 1.538126 (1.743147)	iter_time 0.013321 (0.008442)
Epoch 76/100	It 320/391	loss 1.527954 (1.747423)	iter_time 0.005027 (0.008350)
Epoch 76/100	It 360/391	loss 1.871932 (1.747257)	iter_time 0.004909 (0.008344)


 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▍                               | 77/100 [07:51<02:21,  6.14s/it]

Epoch 76/100	val_acc 47.74%
Epoch 77/100	It 0/391	loss 1.676701 (1.676701)	iter_time 0.098769 (0.098769)
Epoch 77/100	It 40/391	loss 1.901186 (1.731404)	iter_time 0.004935 (0.011588)
Epoch 77/100	It 80/391	loss 1.967871 (1.754923)	iter_time 0.005214 (0.009665)
Epoch 77/100	It 120/391	loss 1.783393 (1.741721)	iter_time 0.005039 (0.009339)
Epoch 77/100	It 160/391	loss 1.937074 (1.752605)	iter_time 0.004989 (0.008874)
Epoch 77/100	It 200/391	loss 1.753069 (1.754472)	iter_time 0.020499 (0.008714)
Epoch 77/100	It 240/391	loss 1.656820 (1.756753)	iter_time 0.004897 (0.008497)
Epoch 77/100	It 280/391	loss 1.619925 (1.752550)	iter_time 0.005006 (0.008568)
Epoch 77/100	It 320/391	loss 1.584027 (1.751174)	iter_time 0.005027 (0.008593)
Epoch 77/100	It 360/391	loss 1.634152 (1.753719)	iter_time 0.005115 (0.008568)


 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 78/100 [07:57<02:14,  6.12s/it]

Epoch 77/100	val_acc 47.74%
Epoch 78/100	It 0/391	loss 1.656396 (1.656396)	iter_time 0.099487 (0.099487)
Epoch 78/100	It 40/391	loss 1.954149 (1.754213)	iter_time 0.004726 (0.011079)
Epoch 78/100	It 80/391	loss 1.906779 (1.741899)	iter_time 0.005082 (0.009343)
Epoch 78/100	It 120/391	loss 1.639441 (1.763806)	iter_time 0.005279 (0.009034)
Epoch 78/100	It 160/391	loss 1.597009 (1.758166)	iter_time 0.004735 (0.008818)
Epoch 78/100	It 200/391	loss 1.824498 (1.752259)	iter_time 0.005014 (0.008661)
Epoch 78/100	It 240/391	loss 2.035589 (1.757167)	iter_time 0.004917 (0.008622)
Epoch 78/100	It 280/391	loss 1.966007 (1.757630)	iter_time 0.004886 (0.008637)
Epoch 78/100	It 320/391	loss 1.821940 (1.758008)	iter_time 0.004595 (0.008727)
Epoch 78/100	It 360/391	loss 1.786096 (1.761260)	iter_time 0.005089 (0.008671)


 79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                            | 79/100 [08:03<02:08,  6.13s/it]

Epoch 78/100	val_acc 47.73%
Epoch 79/100	It 0/391	loss 1.696205 (1.696205)	iter_time 0.100097 (0.100097)
Epoch 79/100	It 40/391	loss 1.751060 (1.755573)	iter_time 0.005295 (0.010416)
Epoch 79/100	It 80/391	loss 2.064159 (1.752922)	iter_time 0.004721 (0.009455)
Epoch 79/100	It 120/391	loss 1.934685 (1.753333)	iter_time 0.004868 (0.008861)
Epoch 79/100	It 160/391	loss 1.565010 (1.751821)	iter_time 0.004932 (0.008862)
Epoch 79/100	It 200/391	loss 1.769614 (1.750715)	iter_time 0.004950 (0.008640)
Epoch 79/100	It 240/391	loss 1.744398 (1.744851)	iter_time 0.005315 (0.008562)
Epoch 79/100	It 280/391	loss 2.095904 (1.743439)	iter_time 0.004757 (0.008535)
Epoch 79/100	It 320/391	loss 1.967068 (1.752487)	iter_time 0.005230 (0.008408)
Epoch 79/100	It 360/391	loss 1.694836 (1.753280)	iter_time 0.005111 (0.008453)


 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                           | 80/100 [08:09<02:02,  6.11s/it]

Epoch 79/100	val_acc 47.75%
Epoch 80/100	It 0/391	loss 1.830437 (1.830437)	iter_time 0.100051 (0.100051)
Epoch 80/100	It 40/391	loss 1.814490 (1.744032)	iter_time 0.005040 (0.010601)
Epoch 80/100	It 80/391	loss 1.879730 (1.735546)	iter_time 0.005008 (0.009210)
Epoch 80/100	It 120/391	loss 1.771593 (1.742542)	iter_time 0.004933 (0.008854)
Epoch 80/100	It 160/391	loss 1.707273 (1.754028)	iter_time 0.013244 (0.008537)
Epoch 80/100	It 200/391	loss 1.729086 (1.746948)	iter_time 0.005016 (0.008554)
Epoch 80/100	It 240/391	loss 1.822441 (1.748629)	iter_time 0.005158 (0.008329)
Epoch 80/100	It 280/391	loss 1.808683 (1.747061)	iter_time 0.005015 (0.008259)
Epoch 80/100	It 320/391	loss 1.575908 (1.745391)	iter_time 0.014040 (0.008263)
Epoch 80/100	It 360/391	loss 1.680839 (1.746538)	iter_time 0.006053 (0.008257)


 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                          | 81/100 [08:15<01:56,  6.13s/it]

Epoch 80/100	val_acc 47.78%
Epoch 81/100	It 0/391	loss 1.764440 (1.764440)	iter_time 0.100397 (0.100397)
Epoch 81/100	It 40/391	loss 1.596240 (1.740805)	iter_time 0.005089 (0.010484)
Epoch 81/100	It 80/391	loss 1.862607 (1.744583)	iter_time 0.024083 (0.009397)
Epoch 81/100	It 120/391	loss 1.572862 (1.735555)	iter_time 0.004913 (0.008908)
Epoch 81/100	It 160/391	loss 1.522071 (1.742383)	iter_time 0.005020 (0.008792)
Epoch 81/100	It 200/391	loss 1.865282 (1.742125)	iter_time 0.022084 (0.008673)
Epoch 81/100	It 240/391	loss 1.983284 (1.747221)	iter_time 0.004958 (0.008527)
Epoch 81/100	It 280/391	loss 1.680806 (1.747289)	iter_time 0.004949 (0.008504)
Epoch 81/100	It 320/391	loss 1.701261 (1.750311)	iter_time 0.022334 (0.008464)
Epoch 81/100	It 360/391	loss 1.812006 (1.749738)	iter_time 0.004873 (0.008381)


 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                        | 82/100 [08:21<01:50,  6.14s/it]

Epoch 81/100	val_acc 47.79%
Epoch 82/100	It 0/391	loss 1.819747 (1.819747)	iter_time 0.098746 (0.098746)
Epoch 82/100	It 40/391	loss 1.887782 (1.801486)	iter_time 0.005020 (0.010662)
Epoch 82/100	It 80/391	loss 1.789784 (1.773142)	iter_time 0.004969 (0.009342)
Epoch 82/100	It 120/391	loss 1.715843 (1.768437)	iter_time 0.004876 (0.008896)
Epoch 82/100	It 160/391	loss 1.769230 (1.768889)	iter_time 0.005256 (0.008556)
Epoch 82/100	It 200/391	loss 1.685600 (1.761009)	iter_time 0.005000 (0.008493)
Epoch 82/100	It 240/391	loss 1.669668 (1.760697)	iter_time 0.006648 (0.008368)
Epoch 82/100	It 280/391	loss 1.633539 (1.757915)	iter_time 0.004946 (0.008310)
Epoch 82/100	It 320/391	loss 1.652514 (1.755783)	iter_time 0.004583 (0.008327)
Epoch 82/100	It 360/391	loss 1.747307 (1.758435)	iter_time 0.014317 (0.008301)


 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 83/100 [08:27<01:43,  6.09s/it]

Epoch 82/100	val_acc 47.78%
Epoch 83/100	It 0/391	loss 1.703573 (1.703573)	iter_time 0.118358 (0.118358)
Epoch 83/100	It 40/391	loss 1.693951 (1.733440)	iter_time 0.005231 (0.010632)
Epoch 83/100	It 80/391	loss 1.952748 (1.760464)	iter_time 0.024723 (0.009790)
Epoch 83/100	It 120/391	loss 1.596171 (1.750581)	iter_time 0.005058 (0.009268)
Epoch 83/100	It 160/391	loss 1.666983 (1.751479)	iter_time 0.004898 (0.009158)
Epoch 83/100	It 200/391	loss 1.754751 (1.750855)	iter_time 0.030384 (0.009211)
Epoch 83/100	It 240/391	loss 1.914221 (1.748418)	iter_time 0.004979 (0.008963)
Epoch 83/100	It 280/391	loss 1.606160 (1.745065)	iter_time 0.005436 (0.008860)
Epoch 83/100	It 320/391	loss 1.653176 (1.746819)	iter_time 0.020841 (0.008684)
Epoch 83/100	It 360/391	loss 1.794048 (1.746322)	iter_time 0.005398 (0.008602)


 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████                      | 84/100 [08:33<01:37,  6.09s/it]

Epoch 83/100	val_acc 47.75%
Epoch 84/100	It 0/391	loss 1.831688 (1.831688)	iter_time 0.118477 (0.118477)
Epoch 84/100	It 40/391	loss 1.919400 (1.764597)	iter_time 0.004928 (0.010709)
Epoch 84/100	It 80/391	loss 2.053848 (1.754373)	iter_time 0.013811 (0.009284)
Epoch 84/100	It 120/391	loss 1.869560 (1.753298)	iter_time 0.004972 (0.008816)
Epoch 84/100	It 160/391	loss 1.747240 (1.747582)	iter_time 0.004845 (0.008687)
Epoch 84/100	It 200/391	loss 1.506332 (1.746240)	iter_time 0.004989 (0.008566)
Epoch 84/100	It 240/391	loss 1.788028 (1.750988)	iter_time 0.004967 (0.008469)
Epoch 84/100	It 280/391	loss 1.887140 (1.751265)	iter_time 0.015932 (0.008391)
Epoch 84/100	It 320/391	loss 1.724412 (1.753276)	iter_time 0.004882 (0.008342)
Epoch 84/100	It 360/391	loss 1.598081 (1.754420)	iter_time 0.004951 (0.008296)


 85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                    | 85/100 [08:40<01:31,  6.09s/it]

Epoch 84/100	val_acc 47.81%
Epoch 85/100	It 0/391	loss 1.814668 (1.814668)	iter_time 0.102720 (0.102720)
Epoch 85/100	It 40/391	loss 1.931342 (1.739076)	iter_time 0.012767 (0.011304)
Epoch 85/100	It 80/391	loss 1.881730 (1.753180)	iter_time 0.005298 (0.009791)
Epoch 85/100	It 120/391	loss 1.631312 (1.745885)	iter_time 0.018437 (0.009062)
Epoch 85/100	It 160/391	loss 1.761111 (1.751388)	iter_time 0.005132 (0.008895)
Epoch 85/100	It 200/391	loss 1.606887 (1.747916)	iter_time 0.005077 (0.008752)
Epoch 85/100	It 240/391	loss 1.566864 (1.745931)	iter_time 0.004988 (0.008601)
Epoch 85/100	It 280/391	loss 1.875002 (1.745323)	iter_time 0.021682 (0.008612)
Epoch 85/100	It 320/391	loss 1.588818 (1.746218)	iter_time 0.004842 (0.008501)
Epoch 85/100	It 360/391	loss 1.655307 (1.745878)	iter_time 0.014996 (0.008441)


 86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                   | 86/100 [08:46<01:25,  6.11s/it]

Epoch 85/100	val_acc 47.81%
Epoch 86/100	It 0/391	loss 1.642542 (1.642542)	iter_time 0.099639 (0.099639)
Epoch 86/100	It 40/391	loss 1.467611 (1.766251)	iter_time 0.004987 (0.011000)
Epoch 86/100	It 80/391	loss 1.465467 (1.737634)	iter_time 0.005007 (0.009571)
Epoch 86/100	It 120/391	loss 1.688182 (1.737240)	iter_time 0.005174 (0.009091)
Epoch 86/100	It 160/391	loss 1.633053 (1.737352)	iter_time 0.026659 (0.008956)
Epoch 86/100	It 200/391	loss 1.999653 (1.739186)	iter_time 0.004872 (0.008695)
Epoch 86/100	It 240/391	loss 1.750374 (1.737980)	iter_time 0.004798 (0.008628)
Epoch 86/100	It 280/391	loss 1.764110 (1.741666)	iter_time 0.005019 (0.008633)
Epoch 86/100	It 320/391	loss 1.585968 (1.741894)	iter_time 0.004992 (0.008569)
Epoch 86/100	It 360/391	loss 1.320832 (1.740414)	iter_time 0.019240 (0.008500)


 87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                 | 87/100 [08:52<01:19,  6.11s/it]

Epoch 86/100	val_acc 47.81%
Epoch 87/100	It 0/391	loss 2.102016 (2.102016)	iter_time 0.117318 (0.117318)
Epoch 87/100	It 40/391	loss 1.785812 (1.782853)	iter_time 0.005374 (0.011061)
Epoch 87/100	It 80/391	loss 2.039776 (1.764572)	iter_time 0.031062 (0.009882)
Epoch 87/100	It 120/391	loss 1.849432 (1.757467)	iter_time 0.005023 (0.009382)
Epoch 87/100	It 160/391	loss 1.749569 (1.760205)	iter_time 0.004984 (0.008991)
Epoch 87/100	It 200/391	loss 1.868784 (1.757543)	iter_time 0.021111 (0.008804)
Epoch 87/100	It 240/391	loss 1.762970 (1.753905)	iter_time 0.004958 (0.008628)
Epoch 87/100	It 280/391	loss 1.678382 (1.748544)	iter_time 0.005090 (0.008730)
Epoch 87/100	It 320/391	loss 1.969018 (1.751640)	iter_time 0.014950 (0.008627)
Epoch 87/100	It 360/391	loss 1.607926 (1.750736)	iter_time 0.005185 (0.008497)


 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                | 88/100 [08:58<01:13,  6.12s/it]

Epoch 87/100	val_acc 47.83%
Epoch 88/100	It 0/391	loss 1.880148 (1.880148)	iter_time 0.113441 (0.113441)
Epoch 88/100	It 40/391	loss 1.393523 (1.717215)	iter_time 0.004958 (0.011363)
Epoch 88/100	It 80/391	loss 1.875729 (1.761956)	iter_time 0.005032 (0.009540)
Epoch 88/100	It 120/391	loss 1.831377 (1.755502)	iter_time 0.004673 (0.009396)
Epoch 88/100	It 160/391	loss 1.403335 (1.749009)	iter_time 0.004618 (0.009174)
Epoch 88/100	It 200/391	loss 1.722147 (1.755588)	iter_time 0.005185 (0.009063)
Epoch 88/100	It 240/391	loss 1.801977 (1.752672)	iter_time 0.005002 (0.008904)
Epoch 88/100	It 280/391	loss 1.709504 (1.749240)	iter_time 0.005300 (0.008722)
Epoch 88/100	It 320/391	loss 1.492226 (1.745435)	iter_time 0.004926 (0.008600)
Epoch 88/100	It 360/391	loss 2.028992 (1.746081)	iter_time 0.005340 (0.008532)


 89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 89/100 [09:04<01:07,  6.11s/it]

Epoch 88/100	val_acc 47.83%
Epoch 89/100	It 0/391	loss 1.838662 (1.838662)	iter_time 0.097649 (0.097649)
Epoch 89/100	It 40/391	loss 1.683396 (1.771283)	iter_time 0.005180 (0.010956)
Epoch 89/100	It 80/391	loss 1.773976 (1.735135)	iter_time 0.004973 (0.009610)
Epoch 89/100	It 120/391	loss 1.752469 (1.734968)	iter_time 0.005018 (0.008948)
Epoch 89/100	It 160/391	loss 1.857284 (1.732923)	iter_time 0.004989 (0.008819)
Epoch 89/100	It 200/391	loss 1.640663 (1.734116)	iter_time 0.015296 (0.008653)
Epoch 89/100	It 240/391	loss 1.806554 (1.741840)	iter_time 0.005065 (0.008419)
Epoch 89/100	It 280/391	loss 1.881753 (1.741761)	iter_time 0.022093 (0.008411)
Epoch 89/100	It 320/391	loss 1.652166 (1.742445)	iter_time 0.005111 (0.008324)
Epoch 89/100	It 360/391	loss 1.811185 (1.741009)	iter_time 0.007492 (0.008272)


 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 90/100 [09:10<01:00,  6.10s/it]

Epoch 89/100	val_acc 47.82%
Epoch 90/100	It 0/391	loss 1.670792 (1.670792)	iter_time 0.117021 (0.117021)
Epoch 90/100	It 40/391	loss 1.424950 (1.717128)	iter_time 0.005090 (0.010629)
Epoch 90/100	It 80/391	loss 1.721587 (1.745282)	iter_time 0.010720 (0.009596)
Epoch 90/100	It 120/391	loss 1.886237 (1.750584)	iter_time 0.005075 (0.009141)
Epoch 90/100	It 160/391	loss 1.843877 (1.733727)	iter_time 0.004790 (0.009011)
Epoch 90/100	It 200/391	loss 1.865360 (1.737547)	iter_time 0.022158 (0.008815)
Epoch 90/100	It 240/391	loss 1.859219 (1.738685)	iter_time 0.005026 (0.008701)
Epoch 90/100	It 280/391	loss 1.866531 (1.748539)	iter_time 0.005145 (0.008613)
Epoch 90/100	It 320/391	loss 1.707254 (1.742990)	iter_time 0.025836 (0.008580)
Epoch 90/100	It 360/391	loss 2.122715 (1.742901)	iter_time 0.004983 (0.008499)


 91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 91/100 [09:16<00:55,  6.13s/it]

Epoch 90/100	val_acc 47.81%
Epoch 91/100	It 0/391	loss 1.555631 (1.555631)	iter_time 0.099833 (0.099833)
Epoch 91/100	It 40/391	loss 1.785741 (1.704379)	iter_time 0.005336 (0.010107)
Epoch 91/100	It 80/391	loss 1.600272 (1.734841)	iter_time 0.005379 (0.009202)
Epoch 91/100	It 120/391	loss 1.904362 (1.744716)	iter_time 0.004956 (0.008785)
Epoch 91/100	It 160/391	loss 2.075423 (1.746723)	iter_time 0.004952 (0.008657)
Epoch 91/100	It 200/391	loss 1.727503 (1.746742)	iter_time 0.026529 (0.008659)
Epoch 91/100	It 240/391	loss 1.503870 (1.743287)	iter_time 0.004950 (0.008454)
Epoch 91/100	It 280/391	loss 1.889093 (1.742674)	iter_time 0.004879 (0.008405)
Epoch 91/100	It 320/391	loss 1.801178 (1.744246)	iter_time 0.005263 (0.008379)
Epoch 91/100	It 360/391	loss 1.563684 (1.744279)	iter_time 0.004986 (0.008311)


 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████           | 92/100 [09:22<00:48,  6.08s/it]

Epoch 91/100	val_acc 47.82%
Epoch 92/100	It 0/391	loss 1.501301 (1.501301)	iter_time 0.098836 (0.098836)
Epoch 92/100	It 40/391	loss 1.487405 (1.774033)	iter_time 0.008923 (0.010323)
Epoch 92/100	It 80/391	loss 1.768227 (1.768980)	iter_time 0.004979 (0.009430)
Epoch 92/100	It 120/391	loss 1.631948 (1.763509)	iter_time 0.005231 (0.008932)
Epoch 92/100	It 160/391	loss 1.653540 (1.765141)	iter_time 0.005209 (0.008626)
Epoch 92/100	It 200/391	loss 1.733730 (1.758185)	iter_time 0.021108 (0.008563)
Epoch 92/100	It 240/391	loss 2.042485 (1.752150)	iter_time 0.005179 (0.008413)
Epoch 92/100	It 280/391	loss 1.826892 (1.751338)	iter_time 0.005018 (0.008387)
Epoch 92/100	It 320/391	loss 1.618888 (1.750490)	iter_time 0.010876 (0.008362)
Epoch 92/100	It 360/391	loss 1.541025 (1.750007)	iter_time 0.024644 (0.008360)


 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍         | 93/100 [09:28<00:42,  6.07s/it]

Epoch 92/100	val_acc 47.83%
Epoch 93/100	It 0/391	loss 1.588773 (1.588773)	iter_time 0.118630 (0.118630)
Epoch 93/100	It 40/391	loss 1.620669 (1.714670)	iter_time 0.004966 (0.010443)
Epoch 93/100	It 80/391	loss 1.535031 (1.732240)	iter_time 0.005187 (0.009215)
Epoch 93/100	It 120/391	loss 1.617685 (1.730861)	iter_time 0.016861 (0.008813)
Epoch 93/100	It 160/391	loss 1.892108 (1.739982)	iter_time 0.004913 (0.008613)
Epoch 93/100	It 200/391	loss 1.721577 (1.736419)	iter_time 0.017177 (0.008394)
Epoch 93/100	It 240/391	loss 1.858511 (1.736608)	iter_time 0.005067 (0.008432)
Epoch 93/100	It 280/391	loss 1.882612 (1.744783)	iter_time 0.015805 (0.008321)
Epoch 93/100	It 320/391	loss 1.795012 (1.749050)	iter_time 0.017429 (0.008318)
Epoch 93/100	It 360/391	loss 1.933176 (1.744370)	iter_time 0.026670 (0.008226)


 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 94/100 [09:34<00:36,  6.06s/it]

Epoch 93/100	val_acc 47.84%
Epoch 94/100	It 0/391	loss 1.772318 (1.772318)	iter_time 0.100950 (0.100950)
Epoch 94/100	It 40/391	loss 1.505851 (1.697029)	iter_time 0.004908 (0.011049)
Epoch 94/100	It 80/391	loss 1.842722 (1.714012)	iter_time 0.004855 (0.009527)
Epoch 94/100	It 120/391	loss 1.796767 (1.731426)	iter_time 0.005432 (0.009237)
Epoch 94/100	It 160/391	loss 1.689925 (1.733718)	iter_time 0.005007 (0.008888)
Epoch 94/100	It 200/391	loss 1.825818 (1.743474)	iter_time 0.005181 (0.008770)
Epoch 94/100	It 240/391	loss 1.638471 (1.739934)	iter_time 0.004817 (0.008694)
Epoch 94/100	It 280/391	loss 1.641376 (1.744506)	iter_time 0.004952 (0.008578)
Epoch 94/100	It 320/391	loss 1.982261 (1.746743)	iter_time 0.004909 (0.008531)
Epoch 94/100	It 360/391	loss 1.809827 (1.745319)	iter_time 0.004960 (0.008485)


 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 95/100 [09:40<00:30,  6.06s/it]

Epoch 94/100	val_acc 47.84%
Epoch 95/100	It 0/391	loss 1.474926 (1.474926)	iter_time 0.117714 (0.117714)
Epoch 95/100	It 40/391	loss 1.758602 (1.739017)	iter_time 0.005220 (0.010741)
Epoch 95/100	It 80/391	loss 1.818313 (1.732742)	iter_time 0.004980 (0.009416)
Epoch 95/100	It 120/391	loss 1.893392 (1.738710)	iter_time 0.004988 (0.009038)
Epoch 95/100	It 160/391	loss 1.833065 (1.742828)	iter_time 0.005362 (0.008877)
Epoch 95/100	It 200/391	loss 1.824673 (1.746767)	iter_time 0.004703 (0.008662)
Epoch 95/100	It 240/391	loss 1.627605 (1.749151)	iter_time 0.004736 (0.008513)
Epoch 95/100	It 280/391	loss 1.698728 (1.746487)	iter_time 0.004795 (0.008647)
Epoch 95/100	It 320/391	loss 1.767904 (1.747028)	iter_time 0.004940 (0.008488)
Epoch 95/100	It 360/391	loss 1.787534 (1.750145)	iter_time 0.004683 (0.008546)


 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 96/100 [09:47<00:24,  6.07s/it]

Epoch 95/100	val_acc 47.85%
Epoch 96/100	It 0/391	loss 1.556823 (1.556823)	iter_time 0.118380 (0.118380)
Epoch 96/100	It 40/391	loss 2.074423 (1.742876)	iter_time 0.005163 (0.010840)
Epoch 96/100	It 80/391	loss 1.516570 (1.739358)	iter_time 0.004953 (0.009151)
Epoch 96/100	It 120/391	loss 1.864884 (1.742655)	iter_time 0.004971 (0.008866)
Epoch 96/100	It 160/391	loss 1.662130 (1.745172)	iter_time 0.004988 (0.008596)
Epoch 96/100	It 200/391	loss 1.711417 (1.743056)	iter_time 0.005265 (0.008358)
Epoch 96/100	It 240/391	loss 1.599575 (1.744266)	iter_time 0.004987 (0.008263)
Epoch 96/100	It 280/391	loss 1.753150 (1.742803)	iter_time 0.005316 (0.008195)
Epoch 96/100	It 320/391	loss 1.663641 (1.746537)	iter_time 0.004882 (0.008264)
Epoch 96/100	It 360/391	loss 1.783911 (1.744973)	iter_time 0.005362 (0.008231)


 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉    | 97/100 [09:53<00:18,  6.08s/it]

Epoch 96/100	val_acc 47.86%
Epoch 97/100	It 0/391	loss 1.422248 (1.422248)	iter_time 0.100762 (0.100762)
Epoch 97/100	It 40/391	loss 1.817909 (1.733642)	iter_time 0.004953 (0.010588)
Epoch 97/100	It 80/391	loss 1.926550 (1.736220)	iter_time 0.005285 (0.009481)
Epoch 97/100	It 120/391	loss 1.894310 (1.737776)	iter_time 0.004933 (0.008958)
Epoch 97/100	It 160/391	loss 1.808139 (1.732332)	iter_time 0.012165 (0.008642)
Epoch 97/100	It 200/391	loss 1.784652 (1.738060)	iter_time 0.005053 (0.008393)
Epoch 97/100	It 240/391	loss 1.804639 (1.745110)	iter_time 0.005108 (0.008307)
Epoch 97/100	It 280/391	loss 1.617455 (1.743530)	iter_time 0.005246 (0.008361)
Epoch 97/100	It 320/391	loss 1.629455 (1.738956)	iter_time 0.004946 (0.008288)
Epoch 97/100	It 360/391	loss 1.613332 (1.732348)	iter_time 0.025172 (0.008251)


 98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 98/100 [09:59<00:12,  6.05s/it]

Epoch 97/100	val_acc 47.84%
Epoch 98/100	It 0/391	loss 1.692616 (1.692616)	iter_time 0.100038 (0.100038)
Epoch 98/100	It 40/391	loss 2.076674 (1.748595)	iter_time 0.005013 (0.010117)
Epoch 98/100	It 80/391	loss 1.790367 (1.760427)	iter_time 0.005008 (0.009295)
Epoch 98/100	It 120/391	loss 1.577076 (1.750340)	iter_time 0.005146 (0.008710)
Epoch 98/100	It 160/391	loss 1.778699 (1.739570)	iter_time 0.005056 (0.008423)
Epoch 98/100	It 200/391	loss 1.690460 (1.741975)	iter_time 0.004978 (0.008404)
Epoch 98/100	It 240/391	loss 1.739845 (1.736997)	iter_time 0.004817 (0.008309)
Epoch 98/100	It 280/391	loss 1.721685 (1.739955)	iter_time 0.004850 (0.008433)
Epoch 98/100	It 320/391	loss 1.666591 (1.744621)	iter_time 0.004955 (0.008484)
Epoch 98/100	It 360/391	loss 1.931059 (1.744200)	iter_time 0.032913 (0.008608)


 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 99/100 [10:05<00:06,  6.08s/it]

Epoch 98/100	val_acc 47.84%
Epoch 99/100	It 0/391	loss 1.605502 (1.605502)	iter_time 0.118343 (0.118343)
Epoch 99/100	It 40/391	loss 1.685499 (1.738668)	iter_time 0.005188 (0.011644)
Epoch 99/100	It 80/391	loss 1.598729 (1.760959)	iter_time 0.004999 (0.010268)
Epoch 99/100	It 120/391	loss 1.553457 (1.752099)	iter_time 0.020096 (0.009532)
Epoch 99/100	It 160/391	loss 1.728272 (1.758151)	iter_time 0.004851 (0.009234)
Epoch 99/100	It 200/391	loss 1.560333 (1.753113)	iter_time 0.004815 (0.008991)
Epoch 99/100	It 240/391	loss 1.898610 (1.750765)	iter_time 0.004934 (0.008867)
Epoch 99/100	It 280/391	loss 1.786513 (1.746032)	iter_time 0.004962 (0.008789)
Epoch 99/100	It 320/391	loss 1.479509 (1.745728)	iter_time 0.004622 (0.008773)
Epoch 99/100	It 360/391	loss 1.597241 (1.748891)	iter_time 0.004755 (0.008712)


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [10:11<00:00,  6.11s/it]

Epoch 99/100	val_acc 47.83%
Best validation accuracy 0.47864



