## Imports

In [1]:
import os
import torch
import torch.nn as nn
import wandb
import numpy as np

from dataset.dataset import MultiModalDataset
from mmcv_model.mmcv_csn import ResNet3dCSN
from mmcv_model.scheduler import GradualWarmupScheduler

from model.multimodal_neck import MultiModalNeck
from model.simple_head import SimpleHead
from model.flow_autoencoder import FlowAutoencoder



## Training funtions

In [2]:

def top_k_accuracy(scores, labels, topk=(1, )):
    """Calculate top k accuracy score.
    Args:
        scores (list[np.ndarray]): Prediction scores for each class.
        labels (list[int]): Ground truth labels.
        topk (tuple[int]): K value for top_k_accuracy. Default: (1, ).
    Returns:
        list[float]: Top k accuracy score for each k.
    """
    res = np.zeros(len(topk))
    labels = np.array(labels)[:, np.newaxis]
    for i, k in enumerate(topk):
        max_k_preds = np.argsort(scores, axis=1)[:, -k:][:, ::-1]
        match_array = np.logical_or.reduce(max_k_preds == labels, axis=1)
        topk_acc_score = match_array.sum() / match_array.shape[0]
        res[i] = topk_acc_score

    return res


def train_one_epoch(epoch_index, interval=5):
    """Run one epoch for training.
    Args:
        epoch_index (int): Current epoch.
        interval (int): Frequency at which to print logs.
    Returns:
        last_loss (float): Loss value for the last batch.
    """
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, results in enumerate(train_loader):
        rgb = results['rgb']
        flow = results['flow']
        targets = results['label']
        targets = targets.reshape(-1, )

        rgb, flow, targets = rgb.to(device), flow.to(device), targets.to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(rgb=rgb, 
                        flow=flow)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, targets)
        loss.backward()

        # Gradient Clipping
        torch.nn.utils.clip_grad_norm_(
            model.parameters(), max_norm=40, norm_type=2.0)

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % interval == interval-1:
            last_loss = running_loss / interval  # loss per batch
            print(
                f'Epoch [{epoch_index}][{i+1}/{len(train_loader)}], lr: {scheduler.get_last_lr()[0]:.5e}, loss: {last_loss:.5}')
            running_loss = 0.

    return last_loss, scheduler.get_last_lr()[0]


def validate():
    """Run one epoch for validation.
    Returns:
        avg_vloss (float): Validation loss value for the last batch.
        top1_acc (float): Top-1 accuracy in decimal.
        top5_acc (float): Top-5 accuracy in decimal.
    """
    running_vloss = 0.0
    running_vacc = np.zeros(2)

    print('Evaluating top_k_accuracy...')

    model.eval()
    with torch.inference_mode():
        for i, results in enumerate(test_loader):
            rgb = results['rgb']
            flow = results['flow']
            vtargets = results['label']

            vtargets = vtargets.reshape(-1, )

            rgb, flow, vtargets = rgb.to(device), flow.to(device), vtargets.to(device)

            voutputs = model(rgb=rgb,
                             flow=flow)

            vloss = loss_fn(voutputs, vtargets)
            running_vloss += vloss

            running_vacc += top_k_accuracy(voutputs.detach().cpu().numpy(),
                                           vtargets.detach().cpu().numpy(), topk=(1, 5))

    avg_vloss = running_vloss / (i + 1)

    acc = running_vacc/len(test_loader)
    top1_acc = acc[0].item()
    top5_acc = acc[1].item()

    return (avg_vloss, top1_acc, top5_acc)

## Get last layer of CSN

In [3]:
class CSNBottleneck(nn.Module):
    expansion = 4
    
    def __init__(self, in_channels, channels, stride=1, mode='ip'):
        super().__init__()
        
        assert mode in ['ip', 'ir']
        self.mode = mode
        
        self.conv1 = nn.Conv3d(in_channels, channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm3d(channels)
        self.relu = nn.ReLU(inplace=True)
        
        conv2 = []
        if self.mode == 'ip':
            conv2.append(nn.Conv3d(channels, channels, kernel_size=1, stride=1, bias=False))
        conv2.append(nn.Conv3d(channels, channels, kernel_size=3, stride=stride, padding=1, bias=False, groups=channels))
        self.conv2 = nn.Sequential(*conv2)
        self.bn2 = nn.BatchNorm3d(channels)
        
        self.conv3 = nn.Conv3d(channels, channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm3d(channels * self.expansion)
        
        self.downsample = nn.Sequential()
        if stride != 1 or in_channels != channels * self.expansion:
            self.downsample = nn.Sequential(
                nn.Conv3d(in_channels, channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm3d(channels * self.expansion)
            )
        
    def forward(self, x):
        shortcut = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        
        out = self.conv3(out)
        out = self.bn3(out)
            
        out += shortcut
        out = self.relu(out)
        
        return out
    
class Fusion(nn.Module):
    def __init__(self,
                 in_channels,
                 channels,
                 n_blocks=3,
                 stride=2,
                 device='cuda'):
        
        super().__init__()
        block=CSNBottleneck
        
        layers = []
        layers.append(block(in_channels, channels, stride, mode='ir'))
        in_channels = channels * block.expansion
        for i in range(1, n_blocks):
            layers.append(block(in_channels, channels, mode='ir'))

        self.fusion = nn.Sequential(*layers).to(device)
        self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.device = device

        
    def forward(self,
               rgb=None,
               flow=None):
        
        out = torch.tensor([]).to(self.device)
        
        if rgb is not None:
            out = torch.concat((out, rgb[-2]), dim=1)
            
        if flow is not None:
            out = torch.concat((out, flow[-2]), dim=1)
            
        fused = self.fusion(out)
        fused_flattened = torch.flatten(self.avg_pool(fused), start_dim=1)
        
        return fused_flattened

## Seven-Sees-Net-V2

In [4]:
class SevenSeesNetV2(nn.Module):
    def __init__(self,
                 rgb_backbone,
                 flow_backbone,
                 fusion,
                 head
                ):
        super(SevenSeesNetV2, self).__init__()
        self.rgb_backbone=rgb_backbone
        self.flow_backbone=flow_backbone
        self.fusion=fusion
        self.head=head
            
    def forward(self,
               rgb,
               flow):
        
        rgb_out=self.rgb_backbone(rgb)
        flow_out=self.flow_backbone(flow)
        
        fusion_out=self.fusion(rgb=rgb_out,
                               flow=flow_out
                              )
        
        return self.head(fusion_out)

## Model Assembly

In [5]:
print('Loading rgb backbone checkpoint...')
rgb_checkpoint = torch.load('rgb_backbone.pth')
print('Loading flow backbone checkpoint...')
flow_checkpoint = torch.load('flow_backbone.pth')

os.chdir('../../..')

wandb.init(entity="cares", project="jack-slr",
           group="fusion", name="7sees-v2")

# Set up device agnostic code
device = 'cuda'

# Configs
work_dir = 'work_dirs/7sees-v2/'
batch_size = 1

os.makedirs(work_dir, exist_ok=True)

train_dataset = MultiModalDataset(ann_file='data/wlasl/train_annotations.txt',
                                  root_dir='data/wlasl/rawframes',
                                  clip_len=32,
                                  modalities=('rgb', 'flow'),
                                  resolution=224,
                                  frame_interval=1,
                                  input_resolution=256,
                                  num_clips=1
                                  )

test_dataset = MultiModalDataset(ann_file='data/wlasl/test_annotations.txt',
                                 root_dir='data/wlasl/rawframes',
                                 clip_len=32,
                                 resolution=224,
                                 modalities=('rgb', 'flow'),
                                 test_mode=True,
                                 frame_interval=1,
                                 input_resolution=256,
                                 num_clips=1
                                 )


# Setting up dataloaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=4,
                                           pin_memory=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=4,
                                          pin_memory=True)

# Custom multimodal model
rgb_backbone = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True,
    out_indices=(0,1,2,3,)
)

# rgb_backbone.init_weights()

rgb_backbone.load_state_dict(rgb_checkpoint)
del rgb_checkpoint

flow_backbone = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True,
    out_indices=(0,1,2,3,)
)

# flow_backbone.init_weights()

flow_backbone.load_state_dict(flow_checkpoint)
del flow_checkpoint

print('Backbones loaded successfully.')

# Freeze the backbones
for name, para in rgb_backbone.named_parameters():
    para.requires_grad = False

for name, para in flow_backbone.named_parameters():
    para.requires_grad = False


fusion = Fusion(in_channels=2048,
                n_blocks=3,
                channels=1024)

head = SimpleHead(num_classes=400,
                      in_channels=4096,
                      dropout_ratio=0.5,
                      init_std=0.01)

head.init_weights()

model = SevenSeesNetV2(rgb_backbone=rgb_backbone,
                       flow_backbone=flow_backbone,
                       fusion=fusion,
                       head=head
                      )

# # Load model checkpoint
# checkpoint = torch.load(work_dir+'latest.pth')
# model.load_state_dict(checkpoint)

# Specify optimizer
optimizer = torch.optim.SGD(
    model.parameters(), lr=0.000125, momentum=0.9, weight_decay=0.00001)

# Specify Loss
loss_cls = nn.CrossEntropyLoss()

# Specify total epochs
epochs = 1000000

# Specify learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=120, gamma=0.1)

scheduler_steplr = torch.optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=[34, 84], gamma=0.1)
scheduler = GradualWarmupScheduler(
    optimizer, multiplier=1, total_epoch=16, after_scheduler=scheduler_steplr)

# Specify Loss
loss_fn = nn.CrossEntropyLoss()

# Setup wandb
wandb.watch(model, log_freq=10)

# Train Loop

# Transfer model to device
model.to(device)

for epoch in range(epochs):
    # Turn on gradient tracking and do a forward pass
    model.train(True)
    avg_loss, learning_rate = train_one_epoch(epoch+1)

    # Turn off  gradients for reporting
    model.train(False)

    avg_vloss, top1_acc, top5_acc = validate()

    print(
        f'top1_acc: {top1_acc:.4}, top5_acc: {top5_acc:.4}, train_loss: {avg_loss:.5}, val_loss: {avg_vloss:.5}')

    # Track best performance, and save the model's state
    model_path = work_dir + f'epoch_{epoch+1}.pth'
    print(f'Saving checkpoint at {epoch+1} epochs...')
    torch.save(model.state_dict(), model_path)

    # Adjust learning rate
    scheduler.step()

    # Track wandb
    wandb.log({'train/loss': avg_loss,
               'train/learning_rate': learning_rate,
               'val/loss': avg_vloss,
               'val/top1_accuracy': top1_acc,
               'val/top5_accuracy': top5_acc})

Loading rgb backbone checkpoint...
Loading flow backbone checkpoint...


[34m[1mwandb[0m: Currently logged in as: [33msttaseen[0m ([33mcares[0m). Use [1m`wandb login --relogin`[0m to force relogin


Backbones loaded successfully.
Epoch [1][5/1780], lr: 0.00000e+00, loss: 6.5499
Epoch [1][10/1780], lr: 0.00000e+00, loss: 6.3253
Epoch [1][15/1780], lr: 0.00000e+00, loss: 6.4714
Epoch [1][20/1780], lr: 0.00000e+00, loss: 6.1576
Epoch [1][25/1780], lr: 0.00000e+00, loss: 6.3232
Epoch [1][30/1780], lr: 0.00000e+00, loss: 5.5537
Epoch [1][35/1780], lr: 0.00000e+00, loss: 6.3892
Epoch [1][40/1780], lr: 0.00000e+00, loss: 6.4837
Epoch [1][45/1780], lr: 0.00000e+00, loss: 6.0692
Epoch [1][50/1780], lr: 0.00000e+00, loss: 5.9663
Epoch [1][55/1780], lr: 0.00000e+00, loss: 6.9095
Epoch [1][60/1780], lr: 0.00000e+00, loss: 5.8866
Epoch [1][65/1780], lr: 0.00000e+00, loss: 6.1869
Epoch [1][70/1780], lr: 0.00000e+00, loss: 5.8705
Epoch [1][75/1780], lr: 0.00000e+00, loss: 6.5528
Epoch [1][80/1780], lr: 0.00000e+00, loss: 6.1477
Epoch [1][85/1780], lr: 0.00000e+00, loss: 6.6303
Epoch [1][90/1780], lr: 0.00000e+00, loss: 6.7406
Epoch [1][95/1780], lr: 0.00000e+00, loss: 7.2196
Epoch [1][100/1780],

Epoch [1][810/1780], lr: 0.00000e+00, loss: 6.6972
Epoch [1][815/1780], lr: 0.00000e+00, loss: 6.1227
Epoch [1][820/1780], lr: 0.00000e+00, loss: 6.0791
Epoch [1][825/1780], lr: 0.00000e+00, loss: 6.3471
Epoch [1][830/1780], lr: 0.00000e+00, loss: 6.1136
Epoch [1][835/1780], lr: 0.00000e+00, loss: 6.1614
Epoch [1][840/1780], lr: 0.00000e+00, loss: 5.8893
Epoch [1][845/1780], lr: 0.00000e+00, loss: 6.1573
Epoch [1][850/1780], lr: 0.00000e+00, loss: 6.8555
Epoch [1][855/1780], lr: 0.00000e+00, loss: 6.214
Epoch [1][860/1780], lr: 0.00000e+00, loss: 6.2795
Epoch [1][865/1780], lr: 0.00000e+00, loss: 6.7227
Epoch [1][870/1780], lr: 0.00000e+00, loss: 5.6967
Epoch [1][875/1780], lr: 0.00000e+00, loss: 7.1259
Epoch [1][880/1780], lr: 0.00000e+00, loss: 6.8425
Epoch [1][885/1780], lr: 0.00000e+00, loss: 6.4727
Epoch [1][890/1780], lr: 0.00000e+00, loss: 6.2777
Epoch [1][895/1780], lr: 0.00000e+00, loss: 5.8119
Epoch [1][900/1780], lr: 0.00000e+00, loss: 6.5597
Epoch [1][905/1780], lr: 0.00000

Epoch [1][1605/1780], lr: 0.00000e+00, loss: 6.5838
Epoch [1][1610/1780], lr: 0.00000e+00, loss: 6.2479
Epoch [1][1615/1780], lr: 0.00000e+00, loss: 6.4374
Epoch [1][1620/1780], lr: 0.00000e+00, loss: 6.5475
Epoch [1][1625/1780], lr: 0.00000e+00, loss: 5.5391
Epoch [1][1630/1780], lr: 0.00000e+00, loss: 6.389
Epoch [1][1635/1780], lr: 0.00000e+00, loss: 6.3914
Epoch [1][1640/1780], lr: 0.00000e+00, loss: 6.5619
Epoch [1][1645/1780], lr: 0.00000e+00, loss: 6.6096
Epoch [1][1650/1780], lr: 0.00000e+00, loss: 7.7509
Epoch [1][1655/1780], lr: 0.00000e+00, loss: 5.7503
Epoch [1][1660/1780], lr: 0.00000e+00, loss: 6.5255
Epoch [1][1665/1780], lr: 0.00000e+00, loss: 6.2155
Epoch [1][1670/1780], lr: 0.00000e+00, loss: 6.6386
Epoch [1][1675/1780], lr: 0.00000e+00, loss: 5.8171
Epoch [1][1680/1780], lr: 0.00000e+00, loss: 6.5461
Epoch [1][1685/1780], lr: 0.00000e+00, loss: 6.2346
Epoch [1][1690/1780], lr: 0.00000e+00, loss: 6.0761
Epoch [1][1695/1780], lr: 0.00000e+00, loss: 6.7767
Epoch [1][170

Epoch [2][620/1780], lr: 7.81250e-06, loss: 5.8688
Epoch [2][625/1780], lr: 7.81250e-06, loss: 6.5075
Epoch [2][630/1780], lr: 7.81250e-06, loss: 5.1973
Epoch [2][635/1780], lr: 7.81250e-06, loss: 5.3725
Epoch [2][640/1780], lr: 7.81250e-06, loss: 6.3239
Epoch [2][645/1780], lr: 7.81250e-06, loss: 4.9549
Epoch [2][650/1780], lr: 7.81250e-06, loss: 6.1423
Epoch [2][655/1780], lr: 7.81250e-06, loss: 6.2888
Epoch [2][660/1780], lr: 7.81250e-06, loss: 5.3005
Epoch [2][665/1780], lr: 7.81250e-06, loss: 5.6807
Epoch [2][670/1780], lr: 7.81250e-06, loss: 5.7509
Epoch [2][675/1780], lr: 7.81250e-06, loss: 5.9233
Epoch [2][680/1780], lr: 7.81250e-06, loss: 6.116
Epoch [2][685/1780], lr: 7.81250e-06, loss: 5.8243
Epoch [2][690/1780], lr: 7.81250e-06, loss: 5.6899
Epoch [2][695/1780], lr: 7.81250e-06, loss: 5.8341
Epoch [2][700/1780], lr: 7.81250e-06, loss: 5.2889
Epoch [2][705/1780], lr: 7.81250e-06, loss: 5.5589
Epoch [2][710/1780], lr: 7.81250e-06, loss: 6.6404
Epoch [2][715/1780], lr: 7.81250

Epoch [2][1420/1780], lr: 7.81250e-06, loss: 5.6579
Epoch [2][1425/1780], lr: 7.81250e-06, loss: 5.1729
Epoch [2][1430/1780], lr: 7.81250e-06, loss: 5.0824
Epoch [2][1435/1780], lr: 7.81250e-06, loss: 5.9609
Epoch [2][1440/1780], lr: 7.81250e-06, loss: 5.0958
Epoch [2][1445/1780], lr: 7.81250e-06, loss: 5.3359
Epoch [2][1450/1780], lr: 7.81250e-06, loss: 6.0337
Epoch [2][1455/1780], lr: 7.81250e-06, loss: 5.2842
Epoch [2][1460/1780], lr: 7.81250e-06, loss: 5.6281
Epoch [2][1465/1780], lr: 7.81250e-06, loss: 5.0554
Epoch [2][1470/1780], lr: 7.81250e-06, loss: 5.579
Epoch [2][1475/1780], lr: 7.81250e-06, loss: 5.4778
Epoch [2][1480/1780], lr: 7.81250e-06, loss: 5.2069
Epoch [2][1485/1780], lr: 7.81250e-06, loss: 5.2171
Epoch [2][1490/1780], lr: 7.81250e-06, loss: 4.6821
Epoch [2][1495/1780], lr: 7.81250e-06, loss: 5.6429
Epoch [2][1500/1780], lr: 7.81250e-06, loss: 5.0076
Epoch [2][1505/1780], lr: 7.81250e-06, loss: 5.7662
Epoch [2][1510/1780], lr: 7.81250e-06, loss: 5.7057
Epoch [2][151

Epoch [3][430/1780], lr: 1.56250e-05, loss: 5.754
Epoch [3][435/1780], lr: 1.56250e-05, loss: 5.7456
Epoch [3][440/1780], lr: 1.56250e-05, loss: 5.8657
Epoch [3][445/1780], lr: 1.56250e-05, loss: 4.6006
Epoch [3][450/1780], lr: 1.56250e-05, loss: 4.6123
Epoch [3][455/1780], lr: 1.56250e-05, loss: 4.958
Epoch [3][460/1780], lr: 1.56250e-05, loss: 5.0285
Epoch [3][465/1780], lr: 1.56250e-05, loss: 5.621
Epoch [3][470/1780], lr: 1.56250e-05, loss: 4.7644
Epoch [3][475/1780], lr: 1.56250e-05, loss: 4.6812
Epoch [3][480/1780], lr: 1.56250e-05, loss: 5.4275
Epoch [3][485/1780], lr: 1.56250e-05, loss: 5.1821
Epoch [3][490/1780], lr: 1.56250e-05, loss: 5.7031
Epoch [3][495/1780], lr: 1.56250e-05, loss: 5.3572
Epoch [3][500/1780], lr: 1.56250e-05, loss: 5.1889
Epoch [3][505/1780], lr: 1.56250e-05, loss: 4.5542
Epoch [3][510/1780], lr: 1.56250e-05, loss: 5.1919
Epoch [3][515/1780], lr: 1.56250e-05, loss: 5.2765
Epoch [3][520/1780], lr: 1.56250e-05, loss: 4.7574
Epoch [3][525/1780], lr: 1.56250e-

Epoch [3][1235/1780], lr: 1.56250e-05, loss: 5.5428
Epoch [3][1240/1780], lr: 1.56250e-05, loss: 4.9239
Epoch [3][1245/1780], lr: 1.56250e-05, loss: 5.2593
Epoch [3][1250/1780], lr: 1.56250e-05, loss: 4.8113
Epoch [3][1255/1780], lr: 1.56250e-05, loss: 5.324
Epoch [3][1260/1780], lr: 1.56250e-05, loss: 5.2851
Epoch [3][1265/1780], lr: 1.56250e-05, loss: 5.2432
Epoch [3][1270/1780], lr: 1.56250e-05, loss: 4.8647
Epoch [3][1275/1780], lr: 1.56250e-05, loss: 5.2787
Epoch [3][1280/1780], lr: 1.56250e-05, loss: 5.3194
Epoch [3][1285/1780], lr: 1.56250e-05, loss: 4.1739
Epoch [3][1290/1780], lr: 1.56250e-05, loss: 5.0539
Epoch [3][1295/1780], lr: 1.56250e-05, loss: 4.8355
Epoch [3][1300/1780], lr: 1.56250e-05, loss: 5.5295
Epoch [3][1305/1780], lr: 1.56250e-05, loss: 4.7968
Epoch [3][1310/1780], lr: 1.56250e-05, loss: 4.6473
Epoch [3][1315/1780], lr: 1.56250e-05, loss: 5.4428
Epoch [3][1320/1780], lr: 1.56250e-05, loss: 4.7757
Epoch [3][1325/1780], lr: 1.56250e-05, loss: 4.9832
Epoch [3][133

Epoch [4][240/1780], lr: 2.34375e-05, loss: 4.7857
Epoch [4][245/1780], lr: 2.34375e-05, loss: 4.2019
Epoch [4][250/1780], lr: 2.34375e-05, loss: 5.0609
Epoch [4][255/1780], lr: 2.34375e-05, loss: 4.5834
Epoch [4][260/1780], lr: 2.34375e-05, loss: 4.3236
Epoch [4][265/1780], lr: 2.34375e-05, loss: 4.8812
Epoch [4][270/1780], lr: 2.34375e-05, loss: 5.4626
Epoch [4][275/1780], lr: 2.34375e-05, loss: 5.2154
Epoch [4][280/1780], lr: 2.34375e-05, loss: 5.0962
Epoch [4][285/1780], lr: 2.34375e-05, loss: 5.2014
Epoch [4][290/1780], lr: 2.34375e-05, loss: 4.9221
Epoch [4][295/1780], lr: 2.34375e-05, loss: 4.7959
Epoch [4][300/1780], lr: 2.34375e-05, loss: 4.6533
Epoch [4][305/1780], lr: 2.34375e-05, loss: 4.8241
Epoch [4][310/1780], lr: 2.34375e-05, loss: 4.9124
Epoch [4][315/1780], lr: 2.34375e-05, loss: 4.6512
Epoch [4][320/1780], lr: 2.34375e-05, loss: 4.596
Epoch [4][325/1780], lr: 2.34375e-05, loss: 5.0231
Epoch [4][330/1780], lr: 2.34375e-05, loss: 5.053
Epoch [4][335/1780], lr: 2.34375e

Epoch [4][1045/1780], lr: 2.34375e-05, loss: 5.2479
Epoch [4][1050/1780], lr: 2.34375e-05, loss: 4.8969
Epoch [4][1055/1780], lr: 2.34375e-05, loss: 4.9647
Epoch [4][1060/1780], lr: 2.34375e-05, loss: 5.1492
Epoch [4][1065/1780], lr: 2.34375e-05, loss: 4.6376
Epoch [4][1070/1780], lr: 2.34375e-05, loss: 4.5652
Epoch [4][1075/1780], lr: 2.34375e-05, loss: 4.9192
Epoch [4][1080/1780], lr: 2.34375e-05, loss: 5.1398
Epoch [4][1085/1780], lr: 2.34375e-05, loss: 5.0389
Epoch [4][1090/1780], lr: 2.34375e-05, loss: 5.0794
Epoch [4][1095/1780], lr: 2.34375e-05, loss: 4.9174
Epoch [4][1100/1780], lr: 2.34375e-05, loss: 4.9206
Epoch [4][1105/1780], lr: 2.34375e-05, loss: 5.0251
Epoch [4][1110/1780], lr: 2.34375e-05, loss: 4.7265
Epoch [4][1115/1780], lr: 2.34375e-05, loss: 3.6664
Epoch [4][1120/1780], lr: 2.34375e-05, loss: 4.8983
Epoch [4][1125/1780], lr: 2.34375e-05, loss: 5.0733
Epoch [4][1130/1780], lr: 2.34375e-05, loss: 5.0114
Epoch [4][1135/1780], lr: 2.34375e-05, loss: 4.2852
Epoch [4][11

Epoch [5][45/1780], lr: 3.12500e-05, loss: 4.7686
Epoch [5][50/1780], lr: 3.12500e-05, loss: 5.2002
Epoch [5][55/1780], lr: 3.12500e-05, loss: 5.7211
Epoch [5][60/1780], lr: 3.12500e-05, loss: 5.0064
Epoch [5][65/1780], lr: 3.12500e-05, loss: 4.6387
Epoch [5][70/1780], lr: 3.12500e-05, loss: 4.883
Epoch [5][75/1780], lr: 3.12500e-05, loss: 4.3521
Epoch [5][80/1780], lr: 3.12500e-05, loss: 5.2272
Epoch [5][85/1780], lr: 3.12500e-05, loss: 5.137
Epoch [5][90/1780], lr: 3.12500e-05, loss: 5.2452
Epoch [5][95/1780], lr: 3.12500e-05, loss: 5.6758
Epoch [5][100/1780], lr: 3.12500e-05, loss: 5.0873
Epoch [5][105/1780], lr: 3.12500e-05, loss: 4.7279
Epoch [5][110/1780], lr: 3.12500e-05, loss: 5.1795
Epoch [5][115/1780], lr: 3.12500e-05, loss: 5.1725
Epoch [5][120/1780], lr: 3.12500e-05, loss: 5.17
Epoch [5][125/1780], lr: 3.12500e-05, loss: 4.6732
Epoch [5][130/1780], lr: 3.12500e-05, loss: 4.4412
Epoch [5][135/1780], lr: 3.12500e-05, loss: 5.3076
Epoch [5][140/1780], lr: 3.12500e-05, loss: 5.

Epoch [5][855/1780], lr: 3.12500e-05, loss: 5.3569
Epoch [5][860/1780], lr: 3.12500e-05, loss: 4.6633
Epoch [5][865/1780], lr: 3.12500e-05, loss: 5.2508
Epoch [5][870/1780], lr: 3.12500e-05, loss: 4.7668
Epoch [5][875/1780], lr: 3.12500e-05, loss: 4.8347
Epoch [5][880/1780], lr: 3.12500e-05, loss: 5.2937
Epoch [5][885/1780], lr: 3.12500e-05, loss: 5.0624
Epoch [5][890/1780], lr: 3.12500e-05, loss: 5.9639
Epoch [5][895/1780], lr: 3.12500e-05, loss: 4.7046
Epoch [5][900/1780], lr: 3.12500e-05, loss: 4.8637
Epoch [5][905/1780], lr: 3.12500e-05, loss: 5.3462
Epoch [5][910/1780], lr: 3.12500e-05, loss: 4.5983
Epoch [5][915/1780], lr: 3.12500e-05, loss: 5.222
Epoch [5][920/1780], lr: 3.12500e-05, loss: 4.5924
Epoch [5][925/1780], lr: 3.12500e-05, loss: 5.7442
Epoch [5][930/1780], lr: 3.12500e-05, loss: 5.1854
Epoch [5][935/1780], lr: 3.12500e-05, loss: 5.1464
Epoch [5][940/1780], lr: 3.12500e-05, loss: 4.9753
Epoch [5][945/1780], lr: 3.12500e-05, loss: 5.1116
Epoch [5][950/1780], lr: 3.12500

Epoch [5][1650/1780], lr: 3.12500e-05, loss: 4.5589
Epoch [5][1655/1780], lr: 3.12500e-05, loss: 4.3268
Epoch [5][1660/1780], lr: 3.12500e-05, loss: 5.3606
Epoch [5][1665/1780], lr: 3.12500e-05, loss: 5.4335
Epoch [5][1670/1780], lr: 3.12500e-05, loss: 4.8794
Epoch [5][1675/1780], lr: 3.12500e-05, loss: 5.4919
Epoch [5][1680/1780], lr: 3.12500e-05, loss: 4.8308
Epoch [5][1685/1780], lr: 3.12500e-05, loss: 5.2303
Epoch [5][1690/1780], lr: 3.12500e-05, loss: 5.0467
Epoch [5][1695/1780], lr: 3.12500e-05, loss: 4.4325
Epoch [5][1700/1780], lr: 3.12500e-05, loss: 5.0912
Epoch [5][1705/1780], lr: 3.12500e-05, loss: 5.0754
Epoch [5][1710/1780], lr: 3.12500e-05, loss: 4.6015
Epoch [5][1715/1780], lr: 3.12500e-05, loss: 4.4544
Epoch [5][1720/1780], lr: 3.12500e-05, loss: 4.4684
Epoch [5][1725/1780], lr: 3.12500e-05, loss: 5.2074
Epoch [5][1730/1780], lr: 3.12500e-05, loss: 4.9826
Epoch [5][1735/1780], lr: 3.12500e-05, loss: 4.9705
Epoch [5][1740/1780], lr: 3.12500e-05, loss: 4.9414
Epoch [5][17

Epoch [6][665/1780], lr: 3.90625e-05, loss: 4.6199
Epoch [6][670/1780], lr: 3.90625e-05, loss: 4.996
Epoch [6][675/1780], lr: 3.90625e-05, loss: 5.572
Epoch [6][680/1780], lr: 3.90625e-05, loss: 4.9772
Epoch [6][685/1780], lr: 3.90625e-05, loss: 5.3672
Epoch [6][690/1780], lr: 3.90625e-05, loss: 4.9425
Epoch [6][695/1780], lr: 3.90625e-05, loss: 4.8465
Epoch [6][700/1780], lr: 3.90625e-05, loss: 5.0359
Epoch [6][705/1780], lr: 3.90625e-05, loss: 4.9924
Epoch [6][710/1780], lr: 3.90625e-05, loss: 5.4956
Epoch [6][715/1780], lr: 3.90625e-05, loss: 4.5269
Epoch [6][720/1780], lr: 3.90625e-05, loss: 5.3315
Epoch [6][725/1780], lr: 3.90625e-05, loss: 5.1512
Epoch [6][730/1780], lr: 3.90625e-05, loss: 4.9304
Epoch [6][735/1780], lr: 3.90625e-05, loss: 4.498
Epoch [6][740/1780], lr: 3.90625e-05, loss: 5.8557
Epoch [6][745/1780], lr: 3.90625e-05, loss: 5.3726
Epoch [6][750/1780], lr: 3.90625e-05, loss: 5.2209
Epoch [6][755/1780], lr: 3.90625e-05, loss: 4.6559
Epoch [6][760/1780], lr: 3.90625e-

Epoch [6][1465/1780], lr: 3.90625e-05, loss: 4.7841
Epoch [6][1470/1780], lr: 3.90625e-05, loss: 5.3653
Epoch [6][1475/1780], lr: 3.90625e-05, loss: 4.274
Epoch [6][1480/1780], lr: 3.90625e-05, loss: 5.2765
Epoch [6][1485/1780], lr: 3.90625e-05, loss: 5.1963
Epoch [6][1490/1780], lr: 3.90625e-05, loss: 4.155
Epoch [6][1495/1780], lr: 3.90625e-05, loss: 5.0584
Epoch [6][1500/1780], lr: 3.90625e-05, loss: 3.5997
Epoch [6][1505/1780], lr: 3.90625e-05, loss: 5.0865
Epoch [6][1510/1780], lr: 3.90625e-05, loss: 4.7533
Epoch [6][1515/1780], lr: 3.90625e-05, loss: 5.3896
Epoch [6][1520/1780], lr: 3.90625e-05, loss: 5.2587
Epoch [6][1525/1780], lr: 3.90625e-05, loss: 5.0202
Epoch [6][1530/1780], lr: 3.90625e-05, loss: 5.7102
Epoch [6][1535/1780], lr: 3.90625e-05, loss: 4.3959
Epoch [6][1540/1780], lr: 3.90625e-05, loss: 5.3129
Epoch [6][1545/1780], lr: 3.90625e-05, loss: 5.1161
Epoch [6][1550/1780], lr: 3.90625e-05, loss: 5.8077
Epoch [6][1555/1780], lr: 3.90625e-05, loss: 5.1445
Epoch [6][1560

Epoch [7][475/1780], lr: 4.68750e-05, loss: 5.2546
Epoch [7][480/1780], lr: 4.68750e-05, loss: 5.9442
Epoch [7][485/1780], lr: 4.68750e-05, loss: 4.6016
Epoch [7][490/1780], lr: 4.68750e-05, loss: 4.9457
Epoch [7][495/1780], lr: 4.68750e-05, loss: 4.9495
Epoch [7][500/1780], lr: 4.68750e-05, loss: 4.8173
Epoch [7][505/1780], lr: 4.68750e-05, loss: 5.0947
Epoch [7][510/1780], lr: 4.68750e-05, loss: 4.4233
Epoch [7][515/1780], lr: 4.68750e-05, loss: 4.569
Epoch [7][520/1780], lr: 4.68750e-05, loss: 5.5865
Epoch [7][525/1780], lr: 4.68750e-05, loss: 5.9294
Epoch [7][530/1780], lr: 4.68750e-05, loss: 4.3652
Epoch [7][535/1780], lr: 4.68750e-05, loss: 4.2431
Epoch [7][540/1780], lr: 4.68750e-05, loss: 5.2625
Epoch [7][545/1780], lr: 4.68750e-05, loss: 4.8298
Epoch [7][550/1780], lr: 4.68750e-05, loss: 4.313
Epoch [7][555/1780], lr: 4.68750e-05, loss: 5.0843
Epoch [7][560/1780], lr: 4.68750e-05, loss: 5.2948
Epoch [7][565/1780], lr: 4.68750e-05, loss: 4.7892
Epoch [7][570/1780], lr: 4.68750e

Epoch [7][1275/1780], lr: 4.68750e-05, loss: 5.7784
Epoch [7][1280/1780], lr: 4.68750e-05, loss: 5.5504
Epoch [7][1285/1780], lr: 4.68750e-05, loss: 5.3805
Epoch [7][1290/1780], lr: 4.68750e-05, loss: 5.152
Epoch [7][1295/1780], lr: 4.68750e-05, loss: 5.2216
Epoch [7][1300/1780], lr: 4.68750e-05, loss: 5.3427
Epoch [7][1305/1780], lr: 4.68750e-05, loss: 5.3988
Epoch [7][1310/1780], lr: 4.68750e-05, loss: 4.8734
Epoch [7][1315/1780], lr: 4.68750e-05, loss: 4.8976
Epoch [7][1320/1780], lr: 4.68750e-05, loss: 5.6867
Epoch [7][1325/1780], lr: 4.68750e-05, loss: 5.3712
Epoch [7][1330/1780], lr: 4.68750e-05, loss: 4.4025
Epoch [7][1335/1780], lr: 4.68750e-05, loss: 5.3232
Epoch [7][1340/1780], lr: 4.68750e-05, loss: 4.9376
Epoch [7][1345/1780], lr: 4.68750e-05, loss: 4.8275
Epoch [7][1350/1780], lr: 4.68750e-05, loss: 4.724
Epoch [7][1355/1780], lr: 4.68750e-05, loss: 4.9486
Epoch [7][1360/1780], lr: 4.68750e-05, loss: 4.8686
Epoch [7][1365/1780], lr: 4.68750e-05, loss: 5.1371
Epoch [7][1370

Epoch [8][280/1780], lr: 5.46875e-05, loss: 4.7567
Epoch [8][285/1780], lr: 5.46875e-05, loss: 5.1453
Epoch [8][290/1780], lr: 5.46875e-05, loss: 5.7519
Epoch [8][295/1780], lr: 5.46875e-05, loss: 4.7103
Epoch [8][300/1780], lr: 5.46875e-05, loss: 4.8631
Epoch [8][305/1780], lr: 5.46875e-05, loss: 4.3019
Epoch [8][310/1780], lr: 5.46875e-05, loss: 5.1728
Epoch [8][315/1780], lr: 5.46875e-05, loss: 5.1892
Epoch [8][320/1780], lr: 5.46875e-05, loss: 5.2544
Epoch [8][325/1780], lr: 5.46875e-05, loss: 5.0188
Epoch [8][330/1780], lr: 5.46875e-05, loss: 4.9562
Epoch [8][335/1780], lr: 5.46875e-05, loss: 5.4055
Epoch [8][340/1780], lr: 5.46875e-05, loss: 4.6858
Epoch [8][345/1780], lr: 5.46875e-05, loss: 5.3394
Epoch [8][350/1780], lr: 5.46875e-05, loss: 5.1421
Epoch [8][355/1780], lr: 5.46875e-05, loss: 5.3989
Epoch [8][360/1780], lr: 5.46875e-05, loss: 5.0991
Epoch [8][365/1780], lr: 5.46875e-05, loss: 4.7112
Epoch [8][370/1780], lr: 5.46875e-05, loss: 5.6719
Epoch [8][375/1780], lr: 5.4687

Epoch [8][1085/1780], lr: 5.46875e-05, loss: 5.5387
Epoch [8][1090/1780], lr: 5.46875e-05, loss: 4.871
Epoch [8][1095/1780], lr: 5.46875e-05, loss: 4.685
Epoch [8][1100/1780], lr: 5.46875e-05, loss: 5.6846
Epoch [8][1105/1780], lr: 5.46875e-05, loss: 6.422
Epoch [8][1110/1780], lr: 5.46875e-05, loss: 5.7015
Epoch [8][1115/1780], lr: 5.46875e-05, loss: 5.2969
Epoch [8][1120/1780], lr: 5.46875e-05, loss: 5.1274
Epoch [8][1125/1780], lr: 5.46875e-05, loss: 5.2454
Epoch [8][1130/1780], lr: 5.46875e-05, loss: 5.9523
Epoch [8][1135/1780], lr: 5.46875e-05, loss: 5.1632
Epoch [8][1140/1780], lr: 5.46875e-05, loss: 4.3078
Epoch [8][1145/1780], lr: 5.46875e-05, loss: 4.9669
Epoch [8][1150/1780], lr: 5.46875e-05, loss: 5.2603
Epoch [8][1155/1780], lr: 5.46875e-05, loss: 4.9587
Epoch [8][1160/1780], lr: 5.46875e-05, loss: 5.0279
Epoch [8][1165/1780], lr: 5.46875e-05, loss: 4.9716
Epoch [8][1170/1780], lr: 5.46875e-05, loss: 5.1957
Epoch [8][1175/1780], lr: 5.46875e-05, loss: 5.6348
Epoch [8][1180/

Epoch [9][85/1780], lr: 6.25000e-05, loss: 5.4583
Epoch [9][90/1780], lr: 6.25000e-05, loss: 5.4224
Epoch [9][95/1780], lr: 6.25000e-05, loss: 5.2059
Epoch [9][100/1780], lr: 6.25000e-05, loss: 5.0785
Epoch [9][105/1780], lr: 6.25000e-05, loss: 4.7583
Epoch [9][110/1780], lr: 6.25000e-05, loss: 5.2831
Epoch [9][115/1780], lr: 6.25000e-05, loss: 5.3928
Epoch [9][120/1780], lr: 6.25000e-05, loss: 4.7858
Epoch [9][125/1780], lr: 6.25000e-05, loss: 5.0725
Epoch [9][130/1780], lr: 6.25000e-05, loss: 5.3255
Epoch [9][135/1780], lr: 6.25000e-05, loss: 5.3822
Epoch [9][140/1780], lr: 6.25000e-05, loss: 5.2383
Epoch [9][145/1780], lr: 6.25000e-05, loss: 5.1186
Epoch [9][150/1780], lr: 6.25000e-05, loss: 5.436
Epoch [9][155/1780], lr: 6.25000e-05, loss: 5.3781
Epoch [9][160/1780], lr: 6.25000e-05, loss: 5.0822
Epoch [9][165/1780], lr: 6.25000e-05, loss: 5.2635
Epoch [9][170/1780], lr: 6.25000e-05, loss: 5.0987
Epoch [9][175/1780], lr: 6.25000e-05, loss: 5.008
Epoch [9][180/1780], lr: 6.25000e-05

Epoch [9][895/1780], lr: 6.25000e-05, loss: 5.2953
Epoch [9][900/1780], lr: 6.25000e-05, loss: 5.1696
Epoch [9][905/1780], lr: 6.25000e-05, loss: 5.3111
Epoch [9][910/1780], lr: 6.25000e-05, loss: 5.2321
Epoch [9][915/1780], lr: 6.25000e-05, loss: 5.6606
Epoch [9][920/1780], lr: 6.25000e-05, loss: 5.5597
Epoch [9][925/1780], lr: 6.25000e-05, loss: 5.2196
Epoch [9][930/1780], lr: 6.25000e-05, loss: 5.1882
Epoch [9][935/1780], lr: 6.25000e-05, loss: 4.3756
Epoch [9][940/1780], lr: 6.25000e-05, loss: 5.4649
Epoch [9][945/1780], lr: 6.25000e-05, loss: 4.1357
Epoch [9][950/1780], lr: 6.25000e-05, loss: 5.4927
Epoch [9][955/1780], lr: 6.25000e-05, loss: 5.305
Epoch [9][960/1780], lr: 6.25000e-05, loss: 5.0352
Epoch [9][965/1780], lr: 6.25000e-05, loss: 5.0288
Epoch [9][970/1780], lr: 6.25000e-05, loss: 5.2494
Epoch [9][975/1780], lr: 6.25000e-05, loss: 5.3493
Epoch [9][980/1780], lr: 6.25000e-05, loss: 4.5472
Epoch [9][985/1780], lr: 6.25000e-05, loss: 4.8482
Epoch [9][990/1780], lr: 6.25000

Epoch [9][1690/1780], lr: 6.25000e-05, loss: 5.2282
Epoch [9][1695/1780], lr: 6.25000e-05, loss: 5.2707
Epoch [9][1700/1780], lr: 6.25000e-05, loss: 4.4949
Epoch [9][1705/1780], lr: 6.25000e-05, loss: 5.2045
Epoch [9][1710/1780], lr: 6.25000e-05, loss: 5.0682
Epoch [9][1715/1780], lr: 6.25000e-05, loss: 4.8109
Epoch [9][1720/1780], lr: 6.25000e-05, loss: 4.6607
Epoch [9][1725/1780], lr: 6.25000e-05, loss: 5.1611
Epoch [9][1730/1780], lr: 6.25000e-05, loss: 4.7845
Epoch [9][1735/1780], lr: 6.25000e-05, loss: 5.1156
Epoch [9][1740/1780], lr: 6.25000e-05, loss: 6.3096
Epoch [9][1745/1780], lr: 6.25000e-05, loss: 4.5676
Epoch [9][1750/1780], lr: 6.25000e-05, loss: 5.4886
Epoch [9][1755/1780], lr: 6.25000e-05, loss: 4.6708
Epoch [9][1760/1780], lr: 6.25000e-05, loss: 4.7828
Epoch [9][1765/1780], lr: 6.25000e-05, loss: 5.4298
Epoch [9][1770/1780], lr: 6.25000e-05, loss: 4.8248
Epoch [9][1775/1780], lr: 6.25000e-05, loss: 5.7108
Epoch [9][1780/1780], lr: 6.25000e-05, loss: 5.1419
Evaluating t

Epoch [10][690/1780], lr: 7.03125e-05, loss: 4.842
Epoch [10][695/1780], lr: 7.03125e-05, loss: 5.8724
Epoch [10][700/1780], lr: 7.03125e-05, loss: 5.5486
Epoch [10][705/1780], lr: 7.03125e-05, loss: 4.8642
Epoch [10][710/1780], lr: 7.03125e-05, loss: 5.5185
Epoch [10][715/1780], lr: 7.03125e-05, loss: 3.9228
Epoch [10][720/1780], lr: 7.03125e-05, loss: 5.0296
Epoch [10][725/1780], lr: 7.03125e-05, loss: 5.3965
Epoch [10][730/1780], lr: 7.03125e-05, loss: 4.8667
Epoch [10][735/1780], lr: 7.03125e-05, loss: 4.9319
Epoch [10][740/1780], lr: 7.03125e-05, loss: 6.0724
Epoch [10][745/1780], lr: 7.03125e-05, loss: 5.5874
Epoch [10][750/1780], lr: 7.03125e-05, loss: 4.5751
Epoch [10][755/1780], lr: 7.03125e-05, loss: 5.3455
Epoch [10][760/1780], lr: 7.03125e-05, loss: 5.2616
Epoch [10][765/1780], lr: 7.03125e-05, loss: 5.11
Epoch [10][770/1780], lr: 7.03125e-05, loss: 5.1627
Epoch [10][775/1780], lr: 7.03125e-05, loss: 5.7077
Epoch [10][780/1780], lr: 7.03125e-05, loss: 4.9075
Epoch [10][785/

Epoch [10][1475/1780], lr: 7.03125e-05, loss: 5.4216
Epoch [10][1480/1780], lr: 7.03125e-05, loss: 4.914
Epoch [10][1485/1780], lr: 7.03125e-05, loss: 5.5289
Epoch [10][1490/1780], lr: 7.03125e-05, loss: 4.1129
Epoch [10][1495/1780], lr: 7.03125e-05, loss: 5.1727
Epoch [10][1500/1780], lr: 7.03125e-05, loss: 4.6934
Epoch [10][1505/1780], lr: 7.03125e-05, loss: 5.2774
Epoch [10][1510/1780], lr: 7.03125e-05, loss: 5.6082
Epoch [10][1515/1780], lr: 7.03125e-05, loss: 5.6297
Epoch [10][1520/1780], lr: 7.03125e-05, loss: 5.1563
Epoch [10][1525/1780], lr: 7.03125e-05, loss: 5.3734
Epoch [10][1530/1780], lr: 7.03125e-05, loss: 5.6625
Epoch [10][1535/1780], lr: 7.03125e-05, loss: 5.5526
Epoch [10][1540/1780], lr: 7.03125e-05, loss: 4.6563
Epoch [10][1545/1780], lr: 7.03125e-05, loss: 4.962
Epoch [10][1550/1780], lr: 7.03125e-05, loss: 4.5994
Epoch [10][1555/1780], lr: 7.03125e-05, loss: 5.9489
Epoch [10][1560/1780], lr: 7.03125e-05, loss: 4.6538
Epoch [10][1565/1780], lr: 7.03125e-05, loss: 5.

Epoch [11][470/1780], lr: 7.81250e-05, loss: 5.5749
Epoch [11][475/1780], lr: 7.81250e-05, loss: 4.7631
Epoch [11][480/1780], lr: 7.81250e-05, loss: 5.6427
Epoch [11][485/1780], lr: 7.81250e-05, loss: 4.6482
Epoch [11][490/1780], lr: 7.81250e-05, loss: 4.4299
Epoch [11][495/1780], lr: 7.81250e-05, loss: 5.4761
Epoch [11][500/1780], lr: 7.81250e-05, loss: 4.8023
Epoch [11][505/1780], lr: 7.81250e-05, loss: 5.1647
Epoch [11][510/1780], lr: 7.81250e-05, loss: 5.5018
Epoch [11][515/1780], lr: 7.81250e-05, loss: 5.6283
Epoch [11][520/1780], lr: 7.81250e-05, loss: 4.9622
Epoch [11][525/1780], lr: 7.81250e-05, loss: 4.8145
Epoch [11][530/1780], lr: 7.81250e-05, loss: 5.101
Epoch [11][535/1780], lr: 7.81250e-05, loss: 4.8355
Epoch [11][540/1780], lr: 7.81250e-05, loss: 5.0957
Epoch [11][545/1780], lr: 7.81250e-05, loss: 5.4294
Epoch [11][550/1780], lr: 7.81250e-05, loss: 4.5553
Epoch [11][555/1780], lr: 7.81250e-05, loss: 5.8131
Epoch [11][560/1780], lr: 7.81250e-05, loss: 5.2568
Epoch [11][56

Epoch [11][1255/1780], lr: 7.81250e-05, loss: 5.3206
Epoch [11][1260/1780], lr: 7.81250e-05, loss: 5.3632
Epoch [11][1265/1780], lr: 7.81250e-05, loss: 5.5242
Epoch [11][1270/1780], lr: 7.81250e-05, loss: 5.7104
Epoch [11][1275/1780], lr: 7.81250e-05, loss: 4.7682
Epoch [11][1280/1780], lr: 7.81250e-05, loss: 5.9104
Epoch [11][1285/1780], lr: 7.81250e-05, loss: 5.3488
Epoch [11][1290/1780], lr: 7.81250e-05, loss: 4.7219
Epoch [11][1295/1780], lr: 7.81250e-05, loss: 4.5239
Epoch [11][1300/1780], lr: 7.81250e-05, loss: 5.659
Epoch [11][1305/1780], lr: 7.81250e-05, loss: 5.3154
Epoch [11][1310/1780], lr: 7.81250e-05, loss: 5.5621
Epoch [11][1315/1780], lr: 7.81250e-05, loss: 5.0158
Epoch [11][1320/1780], lr: 7.81250e-05, loss: 5.6206
Epoch [11][1325/1780], lr: 7.81250e-05, loss: 5.1975
Epoch [11][1330/1780], lr: 7.81250e-05, loss: 4.9672
Epoch [11][1335/1780], lr: 7.81250e-05, loss: 5.3399
Epoch [11][1340/1780], lr: 7.81250e-05, loss: 4.7394
Epoch [11][1345/1780], lr: 7.81250e-05, loss: 5

Epoch [12][245/1780], lr: 8.59375e-05, loss: 4.7265
Epoch [12][250/1780], lr: 8.59375e-05, loss: 5.4684
Epoch [12][255/1780], lr: 8.59375e-05, loss: 5.9231
Epoch [12][260/1780], lr: 8.59375e-05, loss: 5.0896
Epoch [12][265/1780], lr: 8.59375e-05, loss: 5.1933
Epoch [12][270/1780], lr: 8.59375e-05, loss: 5.6691
Epoch [12][275/1780], lr: 8.59375e-05, loss: 4.6086
Epoch [12][280/1780], lr: 8.59375e-05, loss: 5.6224
Epoch [12][285/1780], lr: 8.59375e-05, loss: 4.5603
Epoch [12][290/1780], lr: 8.59375e-05, loss: 4.6297
Epoch [12][295/1780], lr: 8.59375e-05, loss: 5.8583
Epoch [12][300/1780], lr: 8.59375e-05, loss: 4.7094
Epoch [12][305/1780], lr: 8.59375e-05, loss: 5.1033
Epoch [12][310/1780], lr: 8.59375e-05, loss: 5.6829
Epoch [12][315/1780], lr: 8.59375e-05, loss: 4.9997
Epoch [12][320/1780], lr: 8.59375e-05, loss: 4.4955
Epoch [12][325/1780], lr: 8.59375e-05, loss: 4.5456
Epoch [12][330/1780], lr: 8.59375e-05, loss: 5.9455
Epoch [12][335/1780], lr: 8.59375e-05, loss: 4.5111
Epoch [12][3

Epoch [12][1035/1780], lr: 8.59375e-05, loss: 4.4965
Epoch [12][1040/1780], lr: 8.59375e-05, loss: 4.8134
Epoch [12][1045/1780], lr: 8.59375e-05, loss: 5.0081
Epoch [12][1050/1780], lr: 8.59375e-05, loss: 5.6473
Epoch [12][1055/1780], lr: 8.59375e-05, loss: 5.9685
Epoch [12][1060/1780], lr: 8.59375e-05, loss: 5.2965
Epoch [12][1065/1780], lr: 8.59375e-05, loss: 5.5435
Epoch [12][1070/1780], lr: 8.59375e-05, loss: 4.6944
Epoch [12][1075/1780], lr: 8.59375e-05, loss: 5.444
Epoch [12][1080/1780], lr: 8.59375e-05, loss: 5.4111
Epoch [12][1085/1780], lr: 8.59375e-05, loss: 5.0801
Epoch [12][1090/1780], lr: 8.59375e-05, loss: 5.8792
Epoch [12][1095/1780], lr: 8.59375e-05, loss: 5.0846
Epoch [12][1100/1780], lr: 8.59375e-05, loss: 6.3379
Epoch [12][1105/1780], lr: 8.59375e-05, loss: 5.7411
Epoch [12][1110/1780], lr: 8.59375e-05, loss: 4.6552
Epoch [12][1115/1780], lr: 8.59375e-05, loss: 4.5354
Epoch [12][1120/1780], lr: 8.59375e-05, loss: 5.3325
Epoch [12][1125/1780], lr: 8.59375e-05, loss: 5

Epoch [13][20/1780], lr: 9.37500e-05, loss: 5.4406
Epoch [13][25/1780], lr: 9.37500e-05, loss: 5.6229
Epoch [13][30/1780], lr: 9.37500e-05, loss: 5.912
Epoch [13][35/1780], lr: 9.37500e-05, loss: 5.4276
Epoch [13][40/1780], lr: 9.37500e-05, loss: 5.3849
Epoch [13][45/1780], lr: 9.37500e-05, loss: 4.8934
Epoch [13][50/1780], lr: 9.37500e-05, loss: 5.4947
Epoch [13][55/1780], lr: 9.37500e-05, loss: 5.0158
Epoch [13][60/1780], lr: 9.37500e-05, loss: 4.9329
Epoch [13][65/1780], lr: 9.37500e-05, loss: 4.7414
Epoch [13][70/1780], lr: 9.37500e-05, loss: 5.0835
Epoch [13][75/1780], lr: 9.37500e-05, loss: 4.9391
Epoch [13][80/1780], lr: 9.37500e-05, loss: 6.0218
Epoch [13][85/1780], lr: 9.37500e-05, loss: 4.2953
Epoch [13][90/1780], lr: 9.37500e-05, loss: 5.8594
Epoch [13][95/1780], lr: 9.37500e-05, loss: 5.0834
Epoch [13][100/1780], lr: 9.37500e-05, loss: 4.5925
Epoch [13][105/1780], lr: 9.37500e-05, loss: 5.688
Epoch [13][110/1780], lr: 9.37500e-05, loss: 5.3461
Epoch [13][115/1780], lr: 9.37

Epoch [13][815/1780], lr: 9.37500e-05, loss: 4.963
Epoch [13][820/1780], lr: 9.37500e-05, loss: 5.0198
Epoch [13][825/1780], lr: 9.37500e-05, loss: 4.934
Epoch [13][830/1780], lr: 9.37500e-05, loss: 5.3751
Epoch [13][835/1780], lr: 9.37500e-05, loss: 4.9508
Epoch [13][840/1780], lr: 9.37500e-05, loss: 5.753
Epoch [13][845/1780], lr: 9.37500e-05, loss: 5.0504
Epoch [13][850/1780], lr: 9.37500e-05, loss: 5.5687
Epoch [13][855/1780], lr: 9.37500e-05, loss: 6.0619
Epoch [13][860/1780], lr: 9.37500e-05, loss: 5.2878
Epoch [13][865/1780], lr: 9.37500e-05, loss: 4.999
Epoch [13][870/1780], lr: 9.37500e-05, loss: 5.6448
Epoch [13][875/1780], lr: 9.37500e-05, loss: 4.7213
Epoch [13][880/1780], lr: 9.37500e-05, loss: 4.7911
Epoch [13][885/1780], lr: 9.37500e-05, loss: 4.3578
Epoch [13][890/1780], lr: 9.37500e-05, loss: 4.5511
Epoch [13][895/1780], lr: 9.37500e-05, loss: 5.3865
Epoch [13][900/1780], lr: 9.37500e-05, loss: 5.5345
Epoch [13][905/1780], lr: 9.37500e-05, loss: 4.6352
Epoch [13][910/1

Epoch [13][1595/1780], lr: 9.37500e-05, loss: 4.7534
Epoch [13][1600/1780], lr: 9.37500e-05, loss: 4.993
Epoch [13][1605/1780], lr: 9.37500e-05, loss: 5.3547
Epoch [13][1610/1780], lr: 9.37500e-05, loss: 5.3317
Epoch [13][1615/1780], lr: 9.37500e-05, loss: 5.0874
Epoch [13][1620/1780], lr: 9.37500e-05, loss: 5.0089
Epoch [13][1625/1780], lr: 9.37500e-05, loss: 5.3198
Epoch [13][1630/1780], lr: 9.37500e-05, loss: 4.5789
Epoch [13][1635/1780], lr: 9.37500e-05, loss: 5.3462
Epoch [13][1640/1780], lr: 9.37500e-05, loss: 5.3228
Epoch [13][1645/1780], lr: 9.37500e-05, loss: 4.4647
Epoch [13][1650/1780], lr: 9.37500e-05, loss: 5.0924
Epoch [13][1655/1780], lr: 9.37500e-05, loss: 5.2807
Epoch [13][1660/1780], lr: 9.37500e-05, loss: 4.2372
Epoch [13][1665/1780], lr: 9.37500e-05, loss: 6.1292
Epoch [13][1670/1780], lr: 9.37500e-05, loss: 5.6131
Epoch [13][1675/1780], lr: 9.37500e-05, loss: 5.3082
Epoch [13][1680/1780], lr: 9.37500e-05, loss: 4.4941
Epoch [13][1685/1780], lr: 9.37500e-05, loss: 4

Epoch [14][590/1780], lr: 1.01563e-04, loss: 5.0857
Epoch [14][595/1780], lr: 1.01563e-04, loss: 5.74
Epoch [14][600/1780], lr: 1.01563e-04, loss: 4.9331
Epoch [14][605/1780], lr: 1.01563e-04, loss: 4.2685
Epoch [14][610/1780], lr: 1.01563e-04, loss: 5.1659
Epoch [14][615/1780], lr: 1.01563e-04, loss: 5.6719
Epoch [14][620/1780], lr: 1.01563e-04, loss: 6.0138
Epoch [14][625/1780], lr: 1.01563e-04, loss: 4.5758
Epoch [14][630/1780], lr: 1.01563e-04, loss: 5.4866
Epoch [14][635/1780], lr: 1.01563e-04, loss: 4.4699
Epoch [14][640/1780], lr: 1.01563e-04, loss: 4.8266
Epoch [14][645/1780], lr: 1.01563e-04, loss: 5.2501
Epoch [14][650/1780], lr: 1.01563e-04, loss: 5.1492
Epoch [14][655/1780], lr: 1.01563e-04, loss: 5.6121
Epoch [14][660/1780], lr: 1.01563e-04, loss: 5.2896
Epoch [14][665/1780], lr: 1.01563e-04, loss: 5.8637
Epoch [14][670/1780], lr: 1.01563e-04, loss: 5.8288
Epoch [14][675/1780], lr: 1.01563e-04, loss: 5.2098
Epoch [14][680/1780], lr: 1.01563e-04, loss: 4.9551
Epoch [14][685

Epoch [14][1375/1780], lr: 1.01563e-04, loss: 5.7939
Epoch [14][1380/1780], lr: 1.01563e-04, loss: 5.7257
Epoch [14][1385/1780], lr: 1.01563e-04, loss: 5.6005
Epoch [14][1390/1780], lr: 1.01563e-04, loss: 5.0641
Epoch [14][1395/1780], lr: 1.01563e-04, loss: 4.9921
Epoch [14][1400/1780], lr: 1.01563e-04, loss: 5.129
Epoch [14][1405/1780], lr: 1.01563e-04, loss: 5.808
Epoch [14][1410/1780], lr: 1.01563e-04, loss: 5.1528
Epoch [14][1415/1780], lr: 1.01563e-04, loss: 5.4952
Epoch [14][1420/1780], lr: 1.01563e-04, loss: 4.1069
Epoch [14][1425/1780], lr: 1.01563e-04, loss: 4.9964
Epoch [14][1430/1780], lr: 1.01563e-04, loss: 4.8713
Epoch [14][1435/1780], lr: 1.01563e-04, loss: 4.2312
Epoch [14][1440/1780], lr: 1.01563e-04, loss: 5.1276
Epoch [14][1445/1780], lr: 1.01563e-04, loss: 4.6826
Epoch [14][1450/1780], lr: 1.01563e-04, loss: 5.4619
Epoch [14][1455/1780], lr: 1.01563e-04, loss: 4.7534
Epoch [14][1460/1780], lr: 1.01563e-04, loss: 5.1737
Epoch [14][1465/1780], lr: 1.01563e-04, loss: 5.

Epoch [15][365/1780], lr: 1.09375e-04, loss: 5.1643
Epoch [15][370/1780], lr: 1.09375e-04, loss: 4.9461
Epoch [15][375/1780], lr: 1.09375e-04, loss: 4.4862
Epoch [15][380/1780], lr: 1.09375e-04, loss: 5.989
Epoch [15][385/1780], lr: 1.09375e-04, loss: 4.9449
Epoch [15][390/1780], lr: 1.09375e-04, loss: 4.9162
Epoch [15][395/1780], lr: 1.09375e-04, loss: 5.2094
Epoch [15][400/1780], lr: 1.09375e-04, loss: 5.321
Epoch [15][405/1780], lr: 1.09375e-04, loss: 5.5402
Epoch [15][410/1780], lr: 1.09375e-04, loss: 5.5651
Epoch [15][415/1780], lr: 1.09375e-04, loss: 5.5706
Epoch [15][420/1780], lr: 1.09375e-04, loss: 5.447
Epoch [15][425/1780], lr: 1.09375e-04, loss: 5.2943
Epoch [15][430/1780], lr: 1.09375e-04, loss: 4.7936
Epoch [15][435/1780], lr: 1.09375e-04, loss: 4.6597
Epoch [15][440/1780], lr: 1.09375e-04, loss: 4.5073
Epoch [15][445/1780], lr: 1.09375e-04, loss: 5.3337
Epoch [15][450/1780], lr: 1.09375e-04, loss: 5.5133
Epoch [15][455/1780], lr: 1.09375e-04, loss: 5.343
Epoch [15][460/1

Epoch [15][1155/1780], lr: 1.09375e-04, loss: 4.8972
Epoch [15][1160/1780], lr: 1.09375e-04, loss: 5.1682
Epoch [15][1165/1780], lr: 1.09375e-04, loss: 5.5757
Epoch [15][1170/1780], lr: 1.09375e-04, loss: 5.1703
Epoch [15][1175/1780], lr: 1.09375e-04, loss: 4.3188
Epoch [15][1180/1780], lr: 1.09375e-04, loss: 5.6416
Epoch [15][1185/1780], lr: 1.09375e-04, loss: 5.5136
Epoch [15][1190/1780], lr: 1.09375e-04, loss: 5.9576
Epoch [15][1195/1780], lr: 1.09375e-04, loss: 4.5039
Epoch [15][1200/1780], lr: 1.09375e-04, loss: 5.7113
Epoch [15][1205/1780], lr: 1.09375e-04, loss: 5.3945
Epoch [15][1210/1780], lr: 1.09375e-04, loss: 5.7121
Epoch [15][1215/1780], lr: 1.09375e-04, loss: 4.9403
Epoch [15][1220/1780], lr: 1.09375e-04, loss: 5.7898
Epoch [15][1225/1780], lr: 1.09375e-04, loss: 5.6567
Epoch [15][1230/1780], lr: 1.09375e-04, loss: 4.7291
Epoch [15][1235/1780], lr: 1.09375e-04, loss: 5.1193
Epoch [15][1240/1780], lr: 1.09375e-04, loss: 4.4938
Epoch [15][1245/1780], lr: 1.09375e-04, loss: 

Epoch [16][145/1780], lr: 1.17187e-04, loss: 5.7669
Epoch [16][150/1780], lr: 1.17187e-04, loss: 4.5996
Epoch [16][155/1780], lr: 1.17187e-04, loss: 5.1466
Epoch [16][160/1780], lr: 1.17187e-04, loss: 5.1152
Epoch [16][165/1780], lr: 1.17187e-04, loss: 5.543
Epoch [16][170/1780], lr: 1.17187e-04, loss: 5.5198
Epoch [16][175/1780], lr: 1.17187e-04, loss: 5.3211
Epoch [16][180/1780], lr: 1.17187e-04, loss: 4.7915
Epoch [16][185/1780], lr: 1.17187e-04, loss: 4.6175
Epoch [16][190/1780], lr: 1.17187e-04, loss: 5.1874
Epoch [16][195/1780], lr: 1.17187e-04, loss: 5.7923
Epoch [16][200/1780], lr: 1.17187e-04, loss: 4.8133
Epoch [16][205/1780], lr: 1.17187e-04, loss: 5.353
Epoch [16][210/1780], lr: 1.17187e-04, loss: 6.8534
Epoch [16][215/1780], lr: 1.17187e-04, loss: 5.2131
Epoch [16][220/1780], lr: 1.17187e-04, loss: 4.5701
Epoch [16][225/1780], lr: 1.17187e-04, loss: 4.5699
Epoch [16][230/1780], lr: 1.17187e-04, loss: 5.3524
Epoch [16][235/1780], lr: 1.17187e-04, loss: 5.6949
Epoch [16][240

Epoch [16][940/1780], lr: 1.17187e-04, loss: 5.4813
Epoch [16][945/1780], lr: 1.17187e-04, loss: 5.9668
Epoch [16][950/1780], lr: 1.17187e-04, loss: 5.741
Epoch [16][955/1780], lr: 1.17187e-04, loss: 5.8434
Epoch [16][960/1780], lr: 1.17187e-04, loss: 6.2549
Epoch [16][965/1780], lr: 1.17187e-04, loss: 5.4459
Epoch [16][970/1780], lr: 1.17187e-04, loss: 5.3103
Epoch [16][975/1780], lr: 1.17187e-04, loss: 6.0936
Epoch [16][980/1780], lr: 1.17187e-04, loss: 6.1186
Epoch [16][985/1780], lr: 1.17187e-04, loss: 5.3029
Epoch [16][990/1780], lr: 1.17187e-04, loss: 4.9181
Epoch [16][995/1780], lr: 1.17187e-04, loss: 5.3802
Epoch [16][1000/1780], lr: 1.17187e-04, loss: 5.7027
Epoch [16][1005/1780], lr: 1.17187e-04, loss: 5.4556
Epoch [16][1010/1780], lr: 1.17187e-04, loss: 5.9287
Epoch [16][1015/1780], lr: 1.17187e-04, loss: 5.7091
Epoch [16][1020/1780], lr: 1.17187e-04, loss: 5.4255
Epoch [16][1025/1780], lr: 1.17187e-04, loss: 4.8447
Epoch [16][1030/1780], lr: 1.17187e-04, loss: 5.3831
Epoch 

Epoch [16][1715/1780], lr: 1.17187e-04, loss: 6.0484
Epoch [16][1720/1780], lr: 1.17187e-04, loss: 4.7123
Epoch [16][1725/1780], lr: 1.17187e-04, loss: 4.9973
Epoch [16][1730/1780], lr: 1.17187e-04, loss: 5.378
Epoch [16][1735/1780], lr: 1.17187e-04, loss: 4.8572
Epoch [16][1740/1780], lr: 1.17187e-04, loss: 4.573
Epoch [16][1745/1780], lr: 1.17187e-04, loss: 5.1419
Epoch [16][1750/1780], lr: 1.17187e-04, loss: 5.0176
Epoch [16][1755/1780], lr: 1.17187e-04, loss: 6.315
Epoch [16][1760/1780], lr: 1.17187e-04, loss: 4.6257
Epoch [16][1765/1780], lr: 1.17187e-04, loss: 4.9208
Epoch [16][1770/1780], lr: 1.17187e-04, loss: 5.1097
Epoch [16][1775/1780], lr: 1.17187e-04, loss: 4.5441
Epoch [16][1780/1780], lr: 1.17187e-04, loss: 5.1957
Evaluating top_k_accuracy...
top1_acc: 0.007752, top5_acc: 0.06977, train_loss: 5.1957, val_loss: 5.1214
Saving checkpoint at 16 epochs...
Epoch [17][5/1780], lr: 1.25000e-04, loss: 4.9873
Epoch [17][10/1780], lr: 1.25000e-04, loss: 5.7413
Epoch [17][15/1780], 

Epoch [17][715/1780], lr: 1.25000e-04, loss: 4.8509
Epoch [17][720/1780], lr: 1.25000e-04, loss: 4.8439
Epoch [17][725/1780], lr: 1.25000e-04, loss: 5.5142
Epoch [17][730/1780], lr: 1.25000e-04, loss: 6.1447
Epoch [17][735/1780], lr: 1.25000e-04, loss: 5.5197
Epoch [17][740/1780], lr: 1.25000e-04, loss: 5.364
Epoch [17][745/1780], lr: 1.25000e-04, loss: 5.2143
Epoch [17][750/1780], lr: 1.25000e-04, loss: 5.7071
Epoch [17][755/1780], lr: 1.25000e-04, loss: 4.6617
Epoch [17][760/1780], lr: 1.25000e-04, loss: 5.466
Epoch [17][765/1780], lr: 1.25000e-04, loss: 5.7583
Epoch [17][770/1780], lr: 1.25000e-04, loss: 5.362
Epoch [17][775/1780], lr: 1.25000e-04, loss: 6.0541
Epoch [17][780/1780], lr: 1.25000e-04, loss: 5.3921
Epoch [17][785/1780], lr: 1.25000e-04, loss: 5.3547
Epoch [17][790/1780], lr: 1.25000e-04, loss: 5.8874
Epoch [17][795/1780], lr: 1.25000e-04, loss: 6.5301
Epoch [17][800/1780], lr: 1.25000e-04, loss: 4.3796
Epoch [17][805/1780], lr: 1.25000e-04, loss: 5.3164
Epoch [17][810/

Epoch [17][1495/1780], lr: 1.25000e-04, loss: 4.794
Epoch [17][1500/1780], lr: 1.25000e-04, loss: 5.1092
Epoch [17][1505/1780], lr: 1.25000e-04, loss: 4.8103
Epoch [17][1510/1780], lr: 1.25000e-04, loss: 5.25
Epoch [17][1515/1780], lr: 1.25000e-04, loss: 5.3303
Epoch [17][1520/1780], lr: 1.25000e-04, loss: 5.7943
Epoch [17][1525/1780], lr: 1.25000e-04, loss: 5.5746
Epoch [17][1530/1780], lr: 1.25000e-04, loss: 4.7649
Epoch [17][1535/1780], lr: 1.25000e-04, loss: 4.5099
Epoch [17][1540/1780], lr: 1.25000e-04, loss: 5.5214
Epoch [17][1545/1780], lr: 1.25000e-04, loss: 6.0162
Epoch [17][1550/1780], lr: 1.25000e-04, loss: 5.0736
Epoch [17][1555/1780], lr: 1.25000e-04, loss: 4.638
Epoch [17][1560/1780], lr: 1.25000e-04, loss: 4.8951
Epoch [17][1565/1780], lr: 1.25000e-04, loss: 5.6458
Epoch [17][1570/1780], lr: 1.25000e-04, loss: 5.715
Epoch [17][1575/1780], lr: 1.25000e-04, loss: 5.3257
Epoch [17][1580/1780], lr: 1.25000e-04, loss: 5.3381
Epoch [17][1585/1780], lr: 1.25000e-04, loss: 5.289

Epoch [18][490/1780], lr: 1.25000e-04, loss: 5.4786
Epoch [18][495/1780], lr: 1.25000e-04, loss: 4.9702
Epoch [18][500/1780], lr: 1.25000e-04, loss: 5.5691
Epoch [18][505/1780], lr: 1.25000e-04, loss: 5.0027
Epoch [18][510/1780], lr: 1.25000e-04, loss: 6.0181
Epoch [18][515/1780], lr: 1.25000e-04, loss: 5.1476
Epoch [18][520/1780], lr: 1.25000e-04, loss: 5.3839
Epoch [18][525/1780], lr: 1.25000e-04, loss: 5.2598
Epoch [18][530/1780], lr: 1.25000e-04, loss: 4.6032
Epoch [18][535/1780], lr: 1.25000e-04, loss: 6.3662
Epoch [18][540/1780], lr: 1.25000e-04, loss: 4.8631
Epoch [18][545/1780], lr: 1.25000e-04, loss: 5.083
Epoch [18][550/1780], lr: 1.25000e-04, loss: 4.9933
Epoch [18][555/1780], lr: 1.25000e-04, loss: 4.9774
Epoch [18][560/1780], lr: 1.25000e-04, loss: 5.4036
Epoch [18][565/1780], lr: 1.25000e-04, loss: 5.7852
Epoch [18][570/1780], lr: 1.25000e-04, loss: 5.3258
Epoch [18][575/1780], lr: 1.25000e-04, loss: 5.2299
Epoch [18][580/1780], lr: 1.25000e-04, loss: 5.2954
Epoch [18][58

Epoch [18][1275/1780], lr: 1.25000e-04, loss: 5.0305
Epoch [18][1280/1780], lr: 1.25000e-04, loss: 4.7235
Epoch [18][1285/1780], lr: 1.25000e-04, loss: 5.9981
Epoch [18][1290/1780], lr: 1.25000e-04, loss: 5.1795
Epoch [18][1295/1780], lr: 1.25000e-04, loss: 4.1316
Epoch [18][1300/1780], lr: 1.25000e-04, loss: 5.0513
Epoch [18][1305/1780], lr: 1.25000e-04, loss: 4.7603
Epoch [18][1310/1780], lr: 1.25000e-04, loss: 4.9133
Epoch [18][1315/1780], lr: 1.25000e-04, loss: 4.9209
Epoch [18][1320/1780], lr: 1.25000e-04, loss: 5.2538
Epoch [18][1325/1780], lr: 1.25000e-04, loss: 5.7745
Epoch [18][1330/1780], lr: 1.25000e-04, loss: 5.4328
Epoch [18][1335/1780], lr: 1.25000e-04, loss: 5.7257
Epoch [18][1340/1780], lr: 1.25000e-04, loss: 5.2311
Epoch [18][1345/1780], lr: 1.25000e-04, loss: 5.6015
Epoch [18][1350/1780], lr: 1.25000e-04, loss: 4.9116
Epoch [18][1355/1780], lr: 1.25000e-04, loss: 5.5353
Epoch [18][1360/1780], lr: 1.25000e-04, loss: 5.7031
Epoch [18][1365/1780], lr: 1.25000e-04, loss: 

Epoch [19][265/1780], lr: 1.25000e-04, loss: 5.2727
Epoch [19][270/1780], lr: 1.25000e-04, loss: 4.9173
Epoch [19][275/1780], lr: 1.25000e-04, loss: 4.2645
Epoch [19][280/1780], lr: 1.25000e-04, loss: 5.1301
Epoch [19][285/1780], lr: 1.25000e-04, loss: 5.0994
Epoch [19][290/1780], lr: 1.25000e-04, loss: 5.4347
Epoch [19][295/1780], lr: 1.25000e-04, loss: 5.3437
Epoch [19][300/1780], lr: 1.25000e-04, loss: 4.4852
Epoch [19][305/1780], lr: 1.25000e-04, loss: 5.1312
Epoch [19][310/1780], lr: 1.25000e-04, loss: 5.1221
Epoch [19][315/1780], lr: 1.25000e-04, loss: 5.1069
Epoch [19][320/1780], lr: 1.25000e-04, loss: 6.4118
Epoch [19][325/1780], lr: 1.25000e-04, loss: 5.237
Epoch [19][330/1780], lr: 1.25000e-04, loss: 5.3032
Epoch [19][335/1780], lr: 1.25000e-04, loss: 5.2186
Epoch [19][340/1780], lr: 1.25000e-04, loss: 5.8009
Epoch [19][345/1780], lr: 1.25000e-04, loss: 4.9087
Epoch [19][350/1780], lr: 1.25000e-04, loss: 5.1699
Epoch [19][355/1780], lr: 1.25000e-04, loss: 4.9471
Epoch [19][36

Epoch [19][1055/1780], lr: 1.25000e-04, loss: 5.4588
Epoch [19][1060/1780], lr: 1.25000e-04, loss: 4.9324
Epoch [19][1065/1780], lr: 1.25000e-04, loss: 5.3498
Epoch [19][1070/1780], lr: 1.25000e-04, loss: 5.6088
Epoch [19][1075/1780], lr: 1.25000e-04, loss: 4.7905
Epoch [19][1080/1780], lr: 1.25000e-04, loss: 5.1181
Epoch [19][1085/1780], lr: 1.25000e-04, loss: 5.0408
Epoch [19][1090/1780], lr: 1.25000e-04, loss: 4.9422
Epoch [19][1095/1780], lr: 1.25000e-04, loss: 4.802
Epoch [19][1100/1780], lr: 1.25000e-04, loss: 4.5885
Epoch [19][1105/1780], lr: 1.25000e-04, loss: 5.5761
Epoch [19][1110/1780], lr: 1.25000e-04, loss: 3.9235
Epoch [19][1115/1780], lr: 1.25000e-04, loss: 5.5235
Epoch [19][1120/1780], lr: 1.25000e-04, loss: 5.2908
Epoch [19][1125/1780], lr: 1.25000e-04, loss: 4.813
Epoch [19][1130/1780], lr: 1.25000e-04, loss: 5.7351
Epoch [19][1135/1780], lr: 1.25000e-04, loss: 5.2484
Epoch [19][1140/1780], lr: 1.25000e-04, loss: 5.7781
Epoch [19][1145/1780], lr: 1.25000e-04, loss: 4.

Epoch [20][40/1780], lr: 1.25000e-04, loss: 4.8781
Epoch [20][45/1780], lr: 1.25000e-04, loss: 6.2462
Epoch [20][50/1780], lr: 1.25000e-04, loss: 4.4002
Epoch [20][55/1780], lr: 1.25000e-04, loss: 4.7579
Epoch [20][60/1780], lr: 1.25000e-04, loss: 5.1095
Epoch [20][65/1780], lr: 1.25000e-04, loss: 5.7172
Epoch [20][70/1780], lr: 1.25000e-04, loss: 4.7945
Epoch [20][75/1780], lr: 1.25000e-04, loss: 5.2367
Epoch [20][80/1780], lr: 1.25000e-04, loss: 4.8428
Epoch [20][85/1780], lr: 1.25000e-04, loss: 5.417
Epoch [20][90/1780], lr: 1.25000e-04, loss: 5.2074
Epoch [20][95/1780], lr: 1.25000e-04, loss: 4.7405
Epoch [20][100/1780], lr: 1.25000e-04, loss: 4.9844
Epoch [20][105/1780], lr: 1.25000e-04, loss: 5.4527
Epoch [20][110/1780], lr: 1.25000e-04, loss: 4.7347
Epoch [20][115/1780], lr: 1.25000e-04, loss: 5.3642
Epoch [20][120/1780], lr: 1.25000e-04, loss: 4.7961
Epoch [20][125/1780], lr: 1.25000e-04, loss: 5.3627
Epoch [20][130/1780], lr: 1.25000e-04, loss: 5.4061
Epoch [20][135/1780], lr:

Epoch [20][830/1780], lr: 1.25000e-04, loss: 5.0642
Epoch [20][835/1780], lr: 1.25000e-04, loss: 6.5499
Epoch [20][840/1780], lr: 1.25000e-04, loss: 4.6419
Epoch [20][845/1780], lr: 1.25000e-04, loss: 5.677
Epoch [20][850/1780], lr: 1.25000e-04, loss: 4.9262
Epoch [20][855/1780], lr: 1.25000e-04, loss: 5.6121
Epoch [20][860/1780], lr: 1.25000e-04, loss: 5.415
Epoch [20][865/1780], lr: 1.25000e-04, loss: 5.0569
Epoch [20][870/1780], lr: 1.25000e-04, loss: 5.066
Epoch [20][875/1780], lr: 1.25000e-04, loss: 4.4587
Epoch [20][880/1780], lr: 1.25000e-04, loss: 5.3661
Epoch [20][885/1780], lr: 1.25000e-04, loss: 5.4598
Epoch [20][890/1780], lr: 1.25000e-04, loss: 5.4097
Epoch [20][895/1780], lr: 1.25000e-04, loss: 5.7265
Epoch [20][900/1780], lr: 1.25000e-04, loss: 4.5676
Epoch [20][905/1780], lr: 1.25000e-04, loss: 5.2098
Epoch [20][910/1780], lr: 1.25000e-04, loss: 4.9356
Epoch [20][915/1780], lr: 1.25000e-04, loss: 5.019
Epoch [20][920/1780], lr: 1.25000e-04, loss: 5.3219
Epoch [20][925/1

Epoch [20][1610/1780], lr: 1.25000e-04, loss: 4.9575
Epoch [20][1615/1780], lr: 1.25000e-04, loss: 5.652
Epoch [20][1620/1780], lr: 1.25000e-04, loss: 5.1393
Epoch [20][1625/1780], lr: 1.25000e-04, loss: 5.9891
Epoch [20][1630/1780], lr: 1.25000e-04, loss: 4.4958
Epoch [20][1635/1780], lr: 1.25000e-04, loss: 6.0175
Epoch [20][1640/1780], lr: 1.25000e-04, loss: 5.7982
Epoch [20][1645/1780], lr: 1.25000e-04, loss: 4.9185
Epoch [20][1650/1780], lr: 1.25000e-04, loss: 6.4159
Epoch [20][1655/1780], lr: 1.25000e-04, loss: 4.9964
Epoch [20][1660/1780], lr: 1.25000e-04, loss: 5.523
Epoch [20][1665/1780], lr: 1.25000e-04, loss: 5.5459
Epoch [20][1670/1780], lr: 1.25000e-04, loss: 5.2655
Epoch [20][1675/1780], lr: 1.25000e-04, loss: 4.8484
Epoch [20][1680/1780], lr: 1.25000e-04, loss: 4.8845
Epoch [20][1685/1780], lr: 1.25000e-04, loss: 5.4347
Epoch [20][1690/1780], lr: 1.25000e-04, loss: 5.5092
Epoch [20][1695/1780], lr: 1.25000e-04, loss: 5.224
Epoch [20][1700/1780], lr: 1.25000e-04, loss: 6.0

Epoch [21][605/1780], lr: 1.25000e-04, loss: 5.7321
Epoch [21][610/1780], lr: 1.25000e-04, loss: 5.9879
Epoch [21][615/1780], lr: 1.25000e-04, loss: 4.7114
Epoch [21][620/1780], lr: 1.25000e-04, loss: 5.0684
Epoch [21][625/1780], lr: 1.25000e-04, loss: 5.4078
Epoch [21][630/1780], lr: 1.25000e-04, loss: 4.4498
Epoch [21][635/1780], lr: 1.25000e-04, loss: 4.7276
Epoch [21][640/1780], lr: 1.25000e-04, loss: 4.2
Epoch [21][645/1780], lr: 1.25000e-04, loss: 5.5107
Epoch [21][650/1780], lr: 1.25000e-04, loss: 4.8319
Epoch [21][655/1780], lr: 1.25000e-04, loss: 5.8612
Epoch [21][660/1780], lr: 1.25000e-04, loss: 5.1639
Epoch [21][665/1780], lr: 1.25000e-04, loss: 5.0725
Epoch [21][670/1780], lr: 1.25000e-04, loss: 5.5992
Epoch [21][675/1780], lr: 1.25000e-04, loss: 5.0363
Epoch [21][680/1780], lr: 1.25000e-04, loss: 4.9089
Epoch [21][685/1780], lr: 1.25000e-04, loss: 4.7216
Epoch [21][690/1780], lr: 1.25000e-04, loss: 4.2187
Epoch [21][695/1780], lr: 1.25000e-04, loss: 5.6324
Epoch [21][700/

Epoch [21][1390/1780], lr: 1.25000e-04, loss: 5.1308
Epoch [21][1395/1780], lr: 1.25000e-04, loss: 5.7343
Epoch [21][1400/1780], lr: 1.25000e-04, loss: 4.91
Epoch [21][1405/1780], lr: 1.25000e-04, loss: 5.8873
Epoch [21][1410/1780], lr: 1.25000e-04, loss: 5.5528
Epoch [21][1415/1780], lr: 1.25000e-04, loss: 5.3933
Epoch [21][1420/1780], lr: 1.25000e-04, loss: 4.7572
Epoch [21][1425/1780], lr: 1.25000e-04, loss: 5.439
Epoch [21][1430/1780], lr: 1.25000e-04, loss: 5.7624
Epoch [21][1435/1780], lr: 1.25000e-04, loss: 5.4194
Epoch [21][1440/1780], lr: 1.25000e-04, loss: 4.4074
Epoch [21][1445/1780], lr: 1.25000e-04, loss: 5.2832
Epoch [21][1450/1780], lr: 1.25000e-04, loss: 5.4425
Epoch [21][1455/1780], lr: 1.25000e-04, loss: 5.316
Epoch [21][1460/1780], lr: 1.25000e-04, loss: 5.6043
Epoch [21][1465/1780], lr: 1.25000e-04, loss: 5.0757
Epoch [21][1470/1780], lr: 1.25000e-04, loss: 5.3277
Epoch [21][1475/1780], lr: 1.25000e-04, loss: 5.2783
Epoch [21][1480/1780], lr: 1.25000e-04, loss: 4.48

Epoch [22][385/1780], lr: 1.25000e-04, loss: 5.6473
Epoch [22][390/1780], lr: 1.25000e-04, loss: 5.3509
Epoch [22][395/1780], lr: 1.25000e-04, loss: 5.635
Epoch [22][400/1780], lr: 1.25000e-04, loss: 4.4104
Epoch [22][405/1780], lr: 1.25000e-04, loss: 5.395
Epoch [22][410/1780], lr: 1.25000e-04, loss: 4.9901
Epoch [22][415/1780], lr: 1.25000e-04, loss: 5.3149
Epoch [22][420/1780], lr: 1.25000e-04, loss: 5.4104
Epoch [22][425/1780], lr: 1.25000e-04, loss: 5.2484
Epoch [22][430/1780], lr: 1.25000e-04, loss: 5.8818
Epoch [22][435/1780], lr: 1.25000e-04, loss: 4.4178
Epoch [22][440/1780], lr: 1.25000e-04, loss: 4.9842
Epoch [22][445/1780], lr: 1.25000e-04, loss: 5.5132
Epoch [22][450/1780], lr: 1.25000e-04, loss: 5.4778
Epoch [22][455/1780], lr: 1.25000e-04, loss: 5.0222
Epoch [22][460/1780], lr: 1.25000e-04, loss: 4.5972
Epoch [22][465/1780], lr: 1.25000e-04, loss: 5.4314
Epoch [22][470/1780], lr: 1.25000e-04, loss: 4.5704
Epoch [22][475/1780], lr: 1.25000e-04, loss: 4.8902
Epoch [22][480

Epoch [22][1175/1780], lr: 1.25000e-04, loss: 4.9378
Epoch [22][1180/1780], lr: 1.25000e-04, loss: 5.2533
Epoch [22][1185/1780], lr: 1.25000e-04, loss: 4.9121
Epoch [22][1190/1780], lr: 1.25000e-04, loss: 4.9955
Epoch [22][1195/1780], lr: 1.25000e-04, loss: 4.9611
Epoch [22][1200/1780], lr: 1.25000e-04, loss: 5.697
Epoch [22][1205/1780], lr: 1.25000e-04, loss: 4.9791
Epoch [22][1210/1780], lr: 1.25000e-04, loss: 5.5499
Epoch [22][1215/1780], lr: 1.25000e-04, loss: 4.3259
Epoch [22][1220/1780], lr: 1.25000e-04, loss: 5.1053
Epoch [22][1225/1780], lr: 1.25000e-04, loss: 4.7237
Epoch [22][1230/1780], lr: 1.25000e-04, loss: 5.6247
Epoch [22][1235/1780], lr: 1.25000e-04, loss: 5.033
Epoch [22][1240/1780], lr: 1.25000e-04, loss: 5.0145
Epoch [22][1245/1780], lr: 1.25000e-04, loss: 4.6619
Epoch [22][1250/1780], lr: 1.25000e-04, loss: 5.0856
Epoch [22][1255/1780], lr: 1.25000e-04, loss: 5.3422
Epoch [22][1260/1780], lr: 1.25000e-04, loss: 5.3914
Epoch [22][1265/1780], lr: 1.25000e-04, loss: 6.

Epoch [23][165/1780], lr: 1.25000e-04, loss: 5.9243
Epoch [23][170/1780], lr: 1.25000e-04, loss: 4.52
Epoch [23][175/1780], lr: 1.25000e-04, loss: 5.6802
Epoch [23][180/1780], lr: 1.25000e-04, loss: 3.9539
Epoch [23][185/1780], lr: 1.25000e-04, loss: 5.6863
Epoch [23][190/1780], lr: 1.25000e-04, loss: 5.6618
Epoch [23][195/1780], lr: 1.25000e-04, loss: 5.302
Epoch [23][200/1780], lr: 1.25000e-04, loss: 4.6818
Epoch [23][205/1780], lr: 1.25000e-04, loss: 4.4095
Epoch [23][210/1780], lr: 1.25000e-04, loss: 5.6745
Epoch [23][215/1780], lr: 1.25000e-04, loss: 5.941
Epoch [23][220/1780], lr: 1.25000e-04, loss: 4.814
Epoch [23][225/1780], lr: 1.25000e-04, loss: 4.5611
Epoch [23][230/1780], lr: 1.25000e-04, loss: 5.5899
Epoch [23][235/1780], lr: 1.25000e-04, loss: 5.6767
Epoch [23][240/1780], lr: 1.25000e-04, loss: 5.6092
Epoch [23][245/1780], lr: 1.25000e-04, loss: 4.4701
Epoch [23][250/1780], lr: 1.25000e-04, loss: 5.791
Epoch [23][255/1780], lr: 1.25000e-04, loss: 4.629
Epoch [23][260/1780

Epoch [23][960/1780], lr: 1.25000e-04, loss: 5.8175
Epoch [23][965/1780], lr: 1.25000e-04, loss: 4.8464
Epoch [23][970/1780], lr: 1.25000e-04, loss: 5.0029
Epoch [23][975/1780], lr: 1.25000e-04, loss: 3.993
Epoch [23][980/1780], lr: 1.25000e-04, loss: 5.6095
Epoch [23][985/1780], lr: 1.25000e-04, loss: 4.9488
Epoch [23][990/1780], lr: 1.25000e-04, loss: 5.4937
Epoch [23][995/1780], lr: 1.25000e-04, loss: 5.1257
Epoch [23][1000/1780], lr: 1.25000e-04, loss: 5.0097
Epoch [23][1005/1780], lr: 1.25000e-04, loss: 5.847
Epoch [23][1010/1780], lr: 1.25000e-04, loss: 5.1071
Epoch [23][1015/1780], lr: 1.25000e-04, loss: 5.6261
Epoch [23][1020/1780], lr: 1.25000e-04, loss: 6.3204
Epoch [23][1025/1780], lr: 1.25000e-04, loss: 5.4306
Epoch [23][1030/1780], lr: 1.25000e-04, loss: 5.1396
Epoch [23][1035/1780], lr: 1.25000e-04, loss: 5.5307
Epoch [23][1040/1780], lr: 1.25000e-04, loss: 4.8154
Epoch [23][1045/1780], lr: 1.25000e-04, loss: 4.9999
Epoch [23][1050/1780], lr: 1.25000e-04, loss: 4.9563
Epo

Epoch [23][1735/1780], lr: 1.25000e-04, loss: 4.9147
Epoch [23][1740/1780], lr: 1.25000e-04, loss: 4.7223
Epoch [23][1745/1780], lr: 1.25000e-04, loss: 5.1093
Epoch [23][1750/1780], lr: 1.25000e-04, loss: 5.1844
Epoch [23][1755/1780], lr: 1.25000e-04, loss: 4.8648
Epoch [23][1760/1780], lr: 1.25000e-04, loss: 5.0629
Epoch [23][1765/1780], lr: 1.25000e-04, loss: 4.4843
Epoch [23][1770/1780], lr: 1.25000e-04, loss: 5.8195
Epoch [23][1775/1780], lr: 1.25000e-04, loss: 5.4502
Epoch [23][1780/1780], lr: 1.25000e-04, loss: 5.0485
Evaluating top_k_accuracy...
top1_acc: 0.0155, top5_acc: 0.0814, train_loss: 5.0485, val_loss: 4.909
Saving checkpoint at 23 epochs...
Epoch [24][5/1780], lr: 1.25000e-04, loss: 4.6636
Epoch [24][10/1780], lr: 1.25000e-04, loss: 4.5049
Epoch [24][15/1780], lr: 1.25000e-04, loss: 4.6651
Epoch [24][20/1780], lr: 1.25000e-04, loss: 4.6263
Epoch [24][25/1780], lr: 1.25000e-04, loss: 5.1634
Epoch [24][30/1780], lr: 1.25000e-04, loss: 4.5696
Epoch [24][35/1780], lr: 1.250

Epoch [24][735/1780], lr: 1.25000e-04, loss: 4.7626
Epoch [24][740/1780], lr: 1.25000e-04, loss: 5.3684
Epoch [24][745/1780], lr: 1.25000e-04, loss: 5.3429
Epoch [24][750/1780], lr: 1.25000e-04, loss: 5.6413
Epoch [24][755/1780], lr: 1.25000e-04, loss: 4.6493
Epoch [24][760/1780], lr: 1.25000e-04, loss: 5.6497
Epoch [24][765/1780], lr: 1.25000e-04, loss: 5.7523
Epoch [24][770/1780], lr: 1.25000e-04, loss: 5.2601
Epoch [24][775/1780], lr: 1.25000e-04, loss: 5.5676
Epoch [24][780/1780], lr: 1.25000e-04, loss: 5.0597
Epoch [24][785/1780], lr: 1.25000e-04, loss: 4.4551
Epoch [24][790/1780], lr: 1.25000e-04, loss: 5.0255
Epoch [24][795/1780], lr: 1.25000e-04, loss: 4.934
Epoch [24][800/1780], lr: 1.25000e-04, loss: 5.8237
Epoch [24][805/1780], lr: 1.25000e-04, loss: 4.8025
Epoch [24][810/1780], lr: 1.25000e-04, loss: 5.6388
Epoch [24][815/1780], lr: 1.25000e-04, loss: 4.9811
Epoch [24][820/1780], lr: 1.25000e-04, loss: 4.8589
Epoch [24][825/1780], lr: 1.25000e-04, loss: 4.3295
Epoch [24][83

Epoch [24][1515/1780], lr: 1.25000e-04, loss: 5.0846
Epoch [24][1520/1780], lr: 1.25000e-04, loss: 4.4575
Epoch [24][1525/1780], lr: 1.25000e-04, loss: 5.0411
Epoch [24][1530/1780], lr: 1.25000e-04, loss: 5.5961
Epoch [24][1535/1780], lr: 1.25000e-04, loss: 5.3166
Epoch [24][1540/1780], lr: 1.25000e-04, loss: 5.7855
Epoch [24][1545/1780], lr: 1.25000e-04, loss: 4.9894
Epoch [24][1550/1780], lr: 1.25000e-04, loss: 5.4638
Epoch [24][1555/1780], lr: 1.25000e-04, loss: 4.8749
Epoch [24][1560/1780], lr: 1.25000e-04, loss: 5.508
Epoch [24][1565/1780], lr: 1.25000e-04, loss: 5.124
Epoch [24][1570/1780], lr: 1.25000e-04, loss: 5.8112
Epoch [24][1575/1780], lr: 1.25000e-04, loss: 5.4184
Epoch [24][1580/1780], lr: 1.25000e-04, loss: 5.4875
Epoch [24][1585/1780], lr: 1.25000e-04, loss: 4.6634
Epoch [24][1590/1780], lr: 1.25000e-04, loss: 5.8544
Epoch [24][1595/1780], lr: 1.25000e-04, loss: 5.149
Epoch [24][1600/1780], lr: 1.25000e-04, loss: 5.2283
Epoch [24][1605/1780], lr: 1.25000e-04, loss: 5.3

Epoch [25][510/1780], lr: 1.25000e-04, loss: 5.0355
Epoch [25][515/1780], lr: 1.25000e-04, loss: 5.2962
Epoch [25][520/1780], lr: 1.25000e-04, loss: 5.0377
Epoch [25][525/1780], lr: 1.25000e-04, loss: 4.4872
Epoch [25][530/1780], lr: 1.25000e-04, loss: 3.855
Epoch [25][535/1780], lr: 1.25000e-04, loss: 4.2309
Epoch [25][540/1780], lr: 1.25000e-04, loss: 5.9489
Epoch [25][545/1780], lr: 1.25000e-04, loss: 4.6177
Epoch [25][550/1780], lr: 1.25000e-04, loss: 5.4783
Epoch [25][555/1780], lr: 1.25000e-04, loss: 4.9086
Epoch [25][560/1780], lr: 1.25000e-04, loss: 5.3587
Epoch [25][565/1780], lr: 1.25000e-04, loss: 5.7928
Epoch [25][570/1780], lr: 1.25000e-04, loss: 5.2781
Epoch [25][575/1780], lr: 1.25000e-04, loss: 4.8714
Epoch [25][580/1780], lr: 1.25000e-04, loss: 4.3992
Epoch [25][585/1780], lr: 1.25000e-04, loss: 4.9235
Epoch [25][590/1780], lr: 1.25000e-04, loss: 4.9195
Epoch [25][595/1780], lr: 1.25000e-04, loss: 6.0257
Epoch [25][600/1780], lr: 1.25000e-04, loss: 5.3879
Epoch [25][60

Epoch [25][1295/1780], lr: 1.25000e-04, loss: 5.3363
Epoch [25][1300/1780], lr: 1.25000e-04, loss: 4.5923
Epoch [25][1305/1780], lr: 1.25000e-04, loss: 5.2769
Epoch [25][1310/1780], lr: 1.25000e-04, loss: 4.7598
Epoch [25][1315/1780], lr: 1.25000e-04, loss: 4.7652
Epoch [25][1320/1780], lr: 1.25000e-04, loss: 5.5107
Epoch [25][1325/1780], lr: 1.25000e-04, loss: 4.8247
Epoch [25][1330/1780], lr: 1.25000e-04, loss: 5.5027
Epoch [25][1335/1780], lr: 1.25000e-04, loss: 5.5066
Epoch [25][1340/1780], lr: 1.25000e-04, loss: 5.4446
Epoch [25][1345/1780], lr: 1.25000e-04, loss: 4.2455
Epoch [25][1350/1780], lr: 1.25000e-04, loss: 5.0885
Epoch [25][1355/1780], lr: 1.25000e-04, loss: 4.182
Epoch [25][1360/1780], lr: 1.25000e-04, loss: 5.3434
Epoch [25][1365/1780], lr: 1.25000e-04, loss: 5.1277
Epoch [25][1370/1780], lr: 1.25000e-04, loss: 4.5225
Epoch [25][1375/1780], lr: 1.25000e-04, loss: 4.4881
Epoch [25][1380/1780], lr: 1.25000e-04, loss: 5.6941
Epoch [25][1385/1780], lr: 1.25000e-04, loss: 4

Epoch [26][285/1780], lr: 1.25000e-04, loss: 5.3327
Epoch [26][290/1780], lr: 1.25000e-04, loss: 5.7522
Epoch [26][295/1780], lr: 1.25000e-04, loss: 4.9306
Epoch [26][300/1780], lr: 1.25000e-04, loss: 5.3508
Epoch [26][305/1780], lr: 1.25000e-04, loss: 4.4957
Epoch [26][310/1780], lr: 1.25000e-04, loss: 5.2499
Epoch [26][315/1780], lr: 1.25000e-04, loss: 5.774
Epoch [26][320/1780], lr: 1.25000e-04, loss: 3.6347
Epoch [26][325/1780], lr: 1.25000e-04, loss: 5.4174
Epoch [26][330/1780], lr: 1.25000e-04, loss: 5.3004
Epoch [26][335/1780], lr: 1.25000e-04, loss: 4.3121
Epoch [26][340/1780], lr: 1.25000e-04, loss: 4.7543
Epoch [26][345/1780], lr: 1.25000e-04, loss: 5.5554
Epoch [26][350/1780], lr: 1.25000e-04, loss: 4.8876
Epoch [26][355/1780], lr: 1.25000e-04, loss: 5.45
Epoch [26][360/1780], lr: 1.25000e-04, loss: 5.5087
Epoch [26][365/1780], lr: 1.25000e-04, loss: 5.6763
Epoch [26][370/1780], lr: 1.25000e-04, loss: 5.0323
Epoch [26][375/1780], lr: 1.25000e-04, loss: 4.8901
Epoch [26][380/

Epoch [26][1075/1780], lr: 1.25000e-04, loss: 4.5086
Epoch [26][1080/1780], lr: 1.25000e-04, loss: 4.6665
Epoch [26][1085/1780], lr: 1.25000e-04, loss: 4.8813
Epoch [26][1090/1780], lr: 1.25000e-04, loss: 5.869
Epoch [26][1095/1780], lr: 1.25000e-04, loss: 5.3948
Epoch [26][1100/1780], lr: 1.25000e-04, loss: 4.8754
Epoch [26][1105/1780], lr: 1.25000e-04, loss: 3.9689
Epoch [26][1110/1780], lr: 1.25000e-04, loss: 4.9251
Epoch [26][1115/1780], lr: 1.25000e-04, loss: 4.5316
Epoch [26][1120/1780], lr: 1.25000e-04, loss: 6.3516
Epoch [26][1125/1780], lr: 1.25000e-04, loss: 5.9743
Epoch [26][1130/1780], lr: 1.25000e-04, loss: 4.2789
Epoch [26][1135/1780], lr: 1.25000e-04, loss: 4.2544
Epoch [26][1140/1780], lr: 1.25000e-04, loss: 4.9075
Epoch [26][1145/1780], lr: 1.25000e-04, loss: 4.3108
Epoch [26][1150/1780], lr: 1.25000e-04, loss: 5.6094
Epoch [26][1155/1780], lr: 1.25000e-04, loss: 5.1472
Epoch [26][1160/1780], lr: 1.25000e-04, loss: 5.4397
Epoch [26][1165/1780], lr: 1.25000e-04, loss: 4

Epoch [27][60/1780], lr: 1.25000e-04, loss: 5.1066
Epoch [27][65/1780], lr: 1.25000e-04, loss: 4.403
Epoch [27][70/1780], lr: 1.25000e-04, loss: 4.5099
Epoch [27][75/1780], lr: 1.25000e-04, loss: 4.9499
Epoch [27][80/1780], lr: 1.25000e-04, loss: 4.2799
Epoch [27][85/1780], lr: 1.25000e-04, loss: 5.8486
Epoch [27][90/1780], lr: 1.25000e-04, loss: 4.5875
Epoch [27][95/1780], lr: 1.25000e-04, loss: 5.1551
Epoch [27][100/1780], lr: 1.25000e-04, loss: 3.671
Epoch [27][105/1780], lr: 1.25000e-04, loss: 5.0299
Epoch [27][110/1780], lr: 1.25000e-04, loss: 6.5287
Epoch [27][115/1780], lr: 1.25000e-04, loss: 4.8426
Epoch [27][120/1780], lr: 1.25000e-04, loss: 5.1421
Epoch [27][125/1780], lr: 1.25000e-04, loss: 5.6463
Epoch [27][130/1780], lr: 1.25000e-04, loss: 5.6605
Epoch [27][135/1780], lr: 1.25000e-04, loss: 6.1155
Epoch [27][140/1780], lr: 1.25000e-04, loss: 5.1212
Epoch [27][145/1780], lr: 1.25000e-04, loss: 4.7835
Epoch [27][150/1780], lr: 1.25000e-04, loss: 4.8765
Epoch [27][155/1780], 

Epoch [27][850/1780], lr: 1.25000e-04, loss: 5.368
Epoch [27][855/1780], lr: 1.25000e-04, loss: 5.0841
Epoch [27][860/1780], lr: 1.25000e-04, loss: 4.7551
Epoch [27][865/1780], lr: 1.25000e-04, loss: 4.4324
Epoch [27][870/1780], lr: 1.25000e-04, loss: 5.4112
Epoch [27][875/1780], lr: 1.25000e-04, loss: 4.0014
Epoch [27][880/1780], lr: 1.25000e-04, loss: 5.2468
Epoch [27][885/1780], lr: 1.25000e-04, loss: 5.1203
Epoch [27][890/1780], lr: 1.25000e-04, loss: 5.6331
Epoch [27][895/1780], lr: 1.25000e-04, loss: 4.5939
Epoch [27][900/1780], lr: 1.25000e-04, loss: 5.2056
Epoch [27][905/1780], lr: 1.25000e-04, loss: 5.028
Epoch [27][910/1780], lr: 1.25000e-04, loss: 4.8706
Epoch [27][915/1780], lr: 1.25000e-04, loss: 5.2903
Epoch [27][920/1780], lr: 1.25000e-04, loss: 5.6942
Epoch [27][925/1780], lr: 1.25000e-04, loss: 4.6638
Epoch [27][930/1780], lr: 1.25000e-04, loss: 5.1962
Epoch [27][935/1780], lr: 1.25000e-04, loss: 4.301
Epoch [27][940/1780], lr: 1.25000e-04, loss: 4.7337
Epoch [27][945/

Epoch [27][1630/1780], lr: 1.25000e-04, loss: 5.3055
Epoch [27][1635/1780], lr: 1.25000e-04, loss: 4.3839
Epoch [27][1640/1780], lr: 1.25000e-04, loss: 5.0647
Epoch [27][1645/1780], lr: 1.25000e-04, loss: 3.8939
Epoch [27][1650/1780], lr: 1.25000e-04, loss: 4.3907
Epoch [27][1655/1780], lr: 1.25000e-04, loss: 5.0596
Epoch [27][1660/1780], lr: 1.25000e-04, loss: 5.4621
Epoch [27][1665/1780], lr: 1.25000e-04, loss: 4.8811
Epoch [27][1670/1780], lr: 1.25000e-04, loss: 4.9941
Epoch [27][1675/1780], lr: 1.25000e-04, loss: 4.5869
Epoch [27][1680/1780], lr: 1.25000e-04, loss: 5.611
Epoch [27][1685/1780], lr: 1.25000e-04, loss: 4.2274
Epoch [27][1690/1780], lr: 1.25000e-04, loss: 4.9785
Epoch [27][1695/1780], lr: 1.25000e-04, loss: 4.8367
Epoch [27][1700/1780], lr: 1.25000e-04, loss: 5.2072
Epoch [27][1705/1780], lr: 1.25000e-04, loss: 4.7035
Epoch [27][1710/1780], lr: 1.25000e-04, loss: 4.4021
Epoch [27][1715/1780], lr: 1.25000e-04, loss: 5.1167
Epoch [27][1720/1780], lr: 1.25000e-04, loss: 5

Epoch [28][630/1780], lr: 1.25000e-04, loss: 5.0313
Epoch [28][635/1780], lr: 1.25000e-04, loss: 5.9593
Epoch [28][640/1780], lr: 1.25000e-04, loss: 5.7046
Epoch [28][645/1780], lr: 1.25000e-04, loss: 4.8192
Epoch [28][650/1780], lr: 1.25000e-04, loss: 5.2484
Epoch [28][655/1780], lr: 1.25000e-04, loss: 5.0579
Epoch [28][660/1780], lr: 1.25000e-04, loss: 5.1516
Epoch [28][665/1780], lr: 1.25000e-04, loss: 5.3761
Epoch [28][670/1780], lr: 1.25000e-04, loss: 4.9311
Epoch [28][675/1780], lr: 1.25000e-04, loss: 5.2048
Epoch [28][680/1780], lr: 1.25000e-04, loss: 4.3957
Epoch [28][685/1780], lr: 1.25000e-04, loss: 5.4205
Epoch [28][690/1780], lr: 1.25000e-04, loss: 5.2112
Epoch [28][695/1780], lr: 1.25000e-04, loss: 4.4427
Epoch [28][700/1780], lr: 1.25000e-04, loss: 4.4651
Epoch [28][705/1780], lr: 1.25000e-04, loss: 4.6287
Epoch [28][710/1780], lr: 1.25000e-04, loss: 3.9333
Epoch [28][715/1780], lr: 1.25000e-04, loss: 5.1268
Epoch [28][720/1780], lr: 1.25000e-04, loss: 5.2268
Epoch [28][7

Epoch [28][1415/1780], lr: 1.25000e-04, loss: 4.9904
Epoch [28][1420/1780], lr: 1.25000e-04, loss: 5.5523
Epoch [28][1425/1780], lr: 1.25000e-04, loss: 5.0073
Epoch [28][1430/1780], lr: 1.25000e-04, loss: 4.4323
Epoch [28][1435/1780], lr: 1.25000e-04, loss: 5.0108
Epoch [28][1440/1780], lr: 1.25000e-04, loss: 5.561
Epoch [28][1445/1780], lr: 1.25000e-04, loss: 4.5304
Epoch [28][1450/1780], lr: 1.25000e-04, loss: 4.6951
Epoch [28][1455/1780], lr: 1.25000e-04, loss: 5.9358
Epoch [28][1460/1780], lr: 1.25000e-04, loss: 4.6506
Epoch [28][1465/1780], lr: 1.25000e-04, loss: 5.2362
Epoch [28][1470/1780], lr: 1.25000e-04, loss: 5.3362
Epoch [28][1475/1780], lr: 1.25000e-04, loss: 4.4652
Epoch [28][1480/1780], lr: 1.25000e-04, loss: 4.4093
Epoch [28][1485/1780], lr: 1.25000e-04, loss: 4.8793
Epoch [28][1490/1780], lr: 1.25000e-04, loss: 4.2942
Epoch [28][1495/1780], lr: 1.25000e-04, loss: 4.7581
Epoch [28][1500/1780], lr: 1.25000e-04, loss: 4.798
Epoch [28][1505/1780], lr: 1.25000e-04, loss: 5.

Epoch [29][410/1780], lr: 1.25000e-04, loss: 5.4247
Epoch [29][415/1780], lr: 1.25000e-04, loss: 5.533
Epoch [29][420/1780], lr: 1.25000e-04, loss: 5.6566
Epoch [29][425/1780], lr: 1.25000e-04, loss: 4.02
Epoch [29][430/1780], lr: 1.25000e-04, loss: 5.1784
Epoch [29][435/1780], lr: 1.25000e-04, loss: 4.7034
Epoch [29][440/1780], lr: 1.25000e-04, loss: 4.8864
Epoch [29][445/1780], lr: 1.25000e-04, loss: 4.6934
Epoch [29][450/1780], lr: 1.25000e-04, loss: 3.9546
Epoch [29][455/1780], lr: 1.25000e-04, loss: 5.5706
Epoch [29][460/1780], lr: 1.25000e-04, loss: 4.967
Epoch [29][465/1780], lr: 1.25000e-04, loss: 4.3654
Epoch [29][470/1780], lr: 1.25000e-04, loss: 5.4666
Epoch [29][475/1780], lr: 1.25000e-04, loss: 4.4041
Epoch [29][480/1780], lr: 1.25000e-04, loss: 4.9218
Epoch [29][485/1780], lr: 1.25000e-04, loss: 4.229
Epoch [29][490/1780], lr: 1.25000e-04, loss: 5.6682
Epoch [29][495/1780], lr: 1.25000e-04, loss: 5.2353
Epoch [29][500/1780], lr: 1.25000e-04, loss: 5.1617
Epoch [29][505/17

Epoch [29][1200/1780], lr: 1.25000e-04, loss: 4.9442
Epoch [29][1205/1780], lr: 1.25000e-04, loss: 4.2598
Epoch [29][1210/1780], lr: 1.25000e-04, loss: 5.8958
Epoch [29][1215/1780], lr: 1.25000e-04, loss: 4.3953
Epoch [29][1220/1780], lr: 1.25000e-04, loss: 4.9907
Epoch [29][1225/1780], lr: 1.25000e-04, loss: 4.4628
Epoch [29][1230/1780], lr: 1.25000e-04, loss: 5.4047
Epoch [29][1235/1780], lr: 1.25000e-04, loss: 5.4157
Epoch [29][1240/1780], lr: 1.25000e-04, loss: 6.2367
Epoch [29][1245/1780], lr: 1.25000e-04, loss: 5.155
Epoch [29][1250/1780], lr: 1.25000e-04, loss: 4.6635
Epoch [29][1255/1780], lr: 1.25000e-04, loss: 5.5105
Epoch [29][1260/1780], lr: 1.25000e-04, loss: 3.9545
Epoch [29][1265/1780], lr: 1.25000e-04, loss: 5.7251
Epoch [29][1270/1780], lr: 1.25000e-04, loss: 3.9066
Epoch [29][1275/1780], lr: 1.25000e-04, loss: 5.3156
Epoch [29][1280/1780], lr: 1.25000e-04, loss: 4.8377
Epoch [29][1285/1780], lr: 1.25000e-04, loss: 5.1338
Epoch [29][1290/1780], lr: 1.25000e-04, loss: 5

Epoch [30][190/1780], lr: 1.25000e-04, loss: 5.1548
Epoch [30][195/1780], lr: 1.25000e-04, loss: 5.4387
Epoch [30][200/1780], lr: 1.25000e-04, loss: 4.7574
Epoch [30][205/1780], lr: 1.25000e-04, loss: 4.4593
Epoch [30][210/1780], lr: 1.25000e-04, loss: 3.5715
Epoch [30][215/1780], lr: 1.25000e-04, loss: 5.003
Epoch [30][220/1780], lr: 1.25000e-04, loss: 4.8476
Epoch [30][225/1780], lr: 1.25000e-04, loss: 4.1746
Epoch [30][230/1780], lr: 1.25000e-04, loss: 5.7835
Epoch [30][235/1780], lr: 1.25000e-04, loss: 4.139
Epoch [30][240/1780], lr: 1.25000e-04, loss: 5.2547
Epoch [30][245/1780], lr: 1.25000e-04, loss: 5.0996
Epoch [30][250/1780], lr: 1.25000e-04, loss: 5.2427
Epoch [30][255/1780], lr: 1.25000e-04, loss: 4.9296
Epoch [30][260/1780], lr: 1.25000e-04, loss: 4.1574
Epoch [30][265/1780], lr: 1.25000e-04, loss: 4.7944
Epoch [30][270/1780], lr: 1.25000e-04, loss: 5.1975
Epoch [30][275/1780], lr: 1.25000e-04, loss: 3.5436
Epoch [30][280/1780], lr: 1.25000e-04, loss: 5.2415
Epoch [30][285

Epoch [30][980/1780], lr: 1.25000e-04, loss: 4.3132
Epoch [30][985/1780], lr: 1.25000e-04, loss: 5.1989
Epoch [30][990/1780], lr: 1.25000e-04, loss: 5.4943
Epoch [30][995/1780], lr: 1.25000e-04, loss: 5.8643
Epoch [30][1000/1780], lr: 1.25000e-04, loss: 5.3412
Epoch [30][1005/1780], lr: 1.25000e-04, loss: 5.2194
Epoch [30][1010/1780], lr: 1.25000e-04, loss: 4.1555
Epoch [30][1015/1780], lr: 1.25000e-04, loss: 4.1263
Epoch [30][1020/1780], lr: 1.25000e-04, loss: 5.04
Epoch [30][1025/1780], lr: 1.25000e-04, loss: 4.8382
Epoch [30][1030/1780], lr: 1.25000e-04, loss: 5.2362
Epoch [30][1035/1780], lr: 1.25000e-04, loss: 4.2177
Epoch [30][1040/1780], lr: 1.25000e-04, loss: 4.6424
Epoch [30][1045/1780], lr: 1.25000e-04, loss: 5.8747
Epoch [30][1050/1780], lr: 1.25000e-04, loss: 5.4211
Epoch [30][1055/1780], lr: 1.25000e-04, loss: 4.3968
Epoch [30][1060/1780], lr: 1.25000e-04, loss: 5.9009
Epoch [30][1065/1780], lr: 1.25000e-04, loss: 4.4012
Epoch [30][1070/1780], lr: 1.25000e-04, loss: 4.3234

Epoch [30][1760/1780], lr: 1.25000e-04, loss: 4.8806
Epoch [30][1765/1780], lr: 1.25000e-04, loss: 4.7945
Epoch [30][1770/1780], lr: 1.25000e-04, loss: 4.9201
Epoch [30][1775/1780], lr: 1.25000e-04, loss: 4.5988
Epoch [30][1780/1780], lr: 1.25000e-04, loss: 4.4483
Evaluating top_k_accuracy...
top1_acc: 0.0155, top5_acc: 0.08915, train_loss: 4.4483, val_loss: 4.7322
Saving checkpoint at 30 epochs...
Epoch [31][5/1780], lr: 1.25000e-04, loss: 5.1471
Epoch [31][10/1780], lr: 1.25000e-04, loss: 4.7465
Epoch [31][15/1780], lr: 1.25000e-04, loss: 4.5319
Epoch [31][20/1780], lr: 1.25000e-04, loss: 5.1221
Epoch [31][25/1780], lr: 1.25000e-04, loss: 4.1194
Epoch [31][30/1780], lr: 1.25000e-04, loss: 5.0231
Epoch [31][35/1780], lr: 1.25000e-04, loss: 5.789
Epoch [31][40/1780], lr: 1.25000e-04, loss: 4.7771
Epoch [31][45/1780], lr: 1.25000e-04, loss: 5.114
Epoch [31][50/1780], lr: 1.25000e-04, loss: 4.4039
Epoch [31][55/1780], lr: 1.25000e-04, loss: 5.0749
Epoch [31][60/1780], lr: 1.25000e-04, lo

Epoch [31][760/1780], lr: 1.25000e-04, loss: 5.0948
Epoch [31][765/1780], lr: 1.25000e-04, loss: 4.9948
Epoch [31][770/1780], lr: 1.25000e-04, loss: 4.982
Epoch [31][775/1780], lr: 1.25000e-04, loss: 4.5315
Epoch [31][780/1780], lr: 1.25000e-04, loss: 4.1436
Epoch [31][785/1780], lr: 1.25000e-04, loss: 4.5327
Epoch [31][790/1780], lr: 1.25000e-04, loss: 4.0608
Epoch [31][795/1780], lr: 1.25000e-04, loss: 4.6138
Epoch [31][800/1780], lr: 1.25000e-04, loss: 4.295
Epoch [31][805/1780], lr: 1.25000e-04, loss: 4.948
Epoch [31][810/1780], lr: 1.25000e-04, loss: 5.4166
Epoch [31][815/1780], lr: 1.25000e-04, loss: 4.759
Epoch [31][820/1780], lr: 1.25000e-04, loss: 4.4735
Epoch [31][825/1780], lr: 1.25000e-04, loss: 5.1632
Epoch [31][830/1780], lr: 1.25000e-04, loss: 4.8038
Epoch [31][835/1780], lr: 1.25000e-04, loss: 5.413
Epoch [31][840/1780], lr: 1.25000e-04, loss: 4.8898
Epoch [31][845/1780], lr: 1.25000e-04, loss: 4.522
Epoch [31][850/1780], lr: 1.25000e-04, loss: 4.9623
Epoch [31][855/178

Epoch [31][1540/1780], lr: 1.25000e-04, loss: 5.027
Epoch [31][1545/1780], lr: 1.25000e-04, loss: 5.3839
Epoch [31][1550/1780], lr: 1.25000e-04, loss: 5.8554
Epoch [31][1555/1780], lr: 1.25000e-04, loss: 5.2285
Epoch [31][1560/1780], lr: 1.25000e-04, loss: 4.0283
Epoch [31][1565/1780], lr: 1.25000e-04, loss: 5.1698
Epoch [31][1570/1780], lr: 1.25000e-04, loss: 4.3899
Epoch [31][1575/1780], lr: 1.25000e-04, loss: 4.1719
Epoch [31][1580/1780], lr: 1.25000e-04, loss: 4.4185
Epoch [31][1585/1780], lr: 1.25000e-04, loss: 5.9712
Epoch [31][1590/1780], lr: 1.25000e-04, loss: 4.1163
Epoch [31][1595/1780], lr: 1.25000e-04, loss: 5.1406
Epoch [31][1600/1780], lr: 1.25000e-04, loss: 5.0893
Epoch [31][1605/1780], lr: 1.25000e-04, loss: 4.5649
Epoch [31][1610/1780], lr: 1.25000e-04, loss: 6.6023
Epoch [31][1615/1780], lr: 1.25000e-04, loss: 4.9979
Epoch [31][1620/1780], lr: 1.25000e-04, loss: 4.365
Epoch [31][1625/1780], lr: 1.25000e-04, loss: 4.3394
Epoch [31][1630/1780], lr: 1.25000e-04, loss: 4.

Epoch [32][535/1780], lr: 1.25000e-04, loss: 4.9242
Epoch [32][540/1780], lr: 1.25000e-04, loss: 3.881
Epoch [32][545/1780], lr: 1.25000e-04, loss: 4.7197
Epoch [32][550/1780], lr: 1.25000e-04, loss: 5.0868
Epoch [32][555/1780], lr: 1.25000e-04, loss: 4.6562
Epoch [32][560/1780], lr: 1.25000e-04, loss: 5.2363
Epoch [32][565/1780], lr: 1.25000e-04, loss: 4.5897
Epoch [32][570/1780], lr: 1.25000e-04, loss: 4.6441
Epoch [32][575/1780], lr: 1.25000e-04, loss: 4.9413
Epoch [32][580/1780], lr: 1.25000e-04, loss: 4.234
Epoch [32][585/1780], lr: 1.25000e-04, loss: 3.947
Epoch [32][590/1780], lr: 1.25000e-04, loss: 5.0175
Epoch [32][595/1780], lr: 1.25000e-04, loss: 4.5074
Epoch [32][600/1780], lr: 1.25000e-04, loss: 5.632
Epoch [32][605/1780], lr: 1.25000e-04, loss: 4.8117
Epoch [32][610/1780], lr: 1.25000e-04, loss: 4.3561
Epoch [32][615/1780], lr: 1.25000e-04, loss: 5.884
Epoch [32][620/1780], lr: 1.25000e-04, loss: 4.0939
Epoch [32][625/1780], lr: 1.25000e-04, loss: 4.9929
Epoch [32][630/17

Epoch [32][1320/1780], lr: 1.25000e-04, loss: 4.8237
Epoch [32][1325/1780], lr: 1.25000e-04, loss: 4.8867
Epoch [32][1330/1780], lr: 1.25000e-04, loss: 5.7989
Epoch [32][1335/1780], lr: 1.25000e-04, loss: 3.9261
Epoch [32][1340/1780], lr: 1.25000e-04, loss: 4.1874
Epoch [32][1345/1780], lr: 1.25000e-04, loss: 4.0106
Epoch [32][1350/1780], lr: 1.25000e-04, loss: 4.5271
Epoch [32][1355/1780], lr: 1.25000e-04, loss: 5.4174
Epoch [32][1360/1780], lr: 1.25000e-04, loss: 5.4005
Epoch [32][1365/1780], lr: 1.25000e-04, loss: 4.5364
Epoch [32][1370/1780], lr: 1.25000e-04, loss: 5.0237
Epoch [32][1375/1780], lr: 1.25000e-04, loss: 5.2755
Epoch [32][1380/1780], lr: 1.25000e-04, loss: 5.3569
Epoch [32][1385/1780], lr: 1.25000e-04, loss: 5.0419
Epoch [32][1390/1780], lr: 1.25000e-04, loss: 4.9809
Epoch [32][1395/1780], lr: 1.25000e-04, loss: 4.6355
Epoch [32][1400/1780], lr: 1.25000e-04, loss: 4.9688
Epoch [32][1405/1780], lr: 1.25000e-04, loss: 3.8102
Epoch [32][1410/1780], lr: 1.25000e-04, loss: 

Epoch [33][310/1780], lr: 1.25000e-04, loss: 3.7914
Epoch [33][315/1780], lr: 1.25000e-04, loss: 4.7915
Epoch [33][320/1780], lr: 1.25000e-04, loss: 4.8794
Epoch [33][325/1780], lr: 1.25000e-04, loss: 4.1857
Epoch [33][330/1780], lr: 1.25000e-04, loss: 5.0119
Epoch [33][335/1780], lr: 1.25000e-04, loss: 3.7048
Epoch [33][340/1780], lr: 1.25000e-04, loss: 3.9198
Epoch [33][345/1780], lr: 1.25000e-04, loss: 4.6178
Epoch [33][350/1780], lr: 1.25000e-04, loss: 2.9586
Epoch [33][355/1780], lr: 1.25000e-04, loss: 4.6629
Epoch [33][360/1780], lr: 1.25000e-04, loss: 4.2442
Epoch [33][365/1780], lr: 1.25000e-04, loss: 5.2121
Epoch [33][370/1780], lr: 1.25000e-04, loss: 3.5442
Epoch [33][375/1780], lr: 1.25000e-04, loss: 5.0727
Epoch [33][380/1780], lr: 1.25000e-04, loss: 4.7488
Epoch [33][385/1780], lr: 1.25000e-04, loss: 5.5665
Epoch [33][390/1780], lr: 1.25000e-04, loss: 5.1625
Epoch [33][395/1780], lr: 1.25000e-04, loss: 5.2546
Epoch [33][400/1780], lr: 1.25000e-04, loss: 4.3356
Epoch [33][4

Epoch [33][1100/1780], lr: 1.25000e-04, loss: 4.6717
Epoch [33][1105/1780], lr: 1.25000e-04, loss: 3.9149
Epoch [33][1110/1780], lr: 1.25000e-04, loss: 4.3975
Epoch [33][1115/1780], lr: 1.25000e-04, loss: 4.2565
Epoch [33][1120/1780], lr: 1.25000e-04, loss: 3.7158
Epoch [33][1125/1780], lr: 1.25000e-04, loss: 5.0533
Epoch [33][1130/1780], lr: 1.25000e-04, loss: 4.9558
Epoch [33][1135/1780], lr: 1.25000e-04, loss: 4.6288
Epoch [33][1140/1780], lr: 1.25000e-04, loss: 3.5912
Epoch [33][1145/1780], lr: 1.25000e-04, loss: 4.5588
Epoch [33][1150/1780], lr: 1.25000e-04, loss: 4.335
Epoch [33][1155/1780], lr: 1.25000e-04, loss: 5.3473
Epoch [33][1160/1780], lr: 1.25000e-04, loss: 3.8924
Epoch [33][1165/1780], lr: 1.25000e-04, loss: 3.8885
Epoch [33][1170/1780], lr: 1.25000e-04, loss: 5.2383
Epoch [33][1175/1780], lr: 1.25000e-04, loss: 4.066
Epoch [33][1180/1780], lr: 1.25000e-04, loss: 4.843
Epoch [33][1185/1780], lr: 1.25000e-04, loss: 4.4332
Epoch [33][1190/1780], lr: 1.25000e-04, loss: 4.7

Epoch [34][85/1780], lr: 1.25000e-04, loss: 4.7972
Epoch [34][90/1780], lr: 1.25000e-04, loss: 5.1441
Epoch [34][95/1780], lr: 1.25000e-04, loss: 4.8488
Epoch [34][100/1780], lr: 1.25000e-04, loss: 4.6187
Epoch [34][105/1780], lr: 1.25000e-04, loss: 4.7
Epoch [34][110/1780], lr: 1.25000e-04, loss: 4.2054
Epoch [34][115/1780], lr: 1.25000e-04, loss: 5.7654
Epoch [34][120/1780], lr: 1.25000e-04, loss: 5.1046
Epoch [34][125/1780], lr: 1.25000e-04, loss: 5.2718
Epoch [34][130/1780], lr: 1.25000e-04, loss: 4.5436
Epoch [34][135/1780], lr: 1.25000e-04, loss: 4.9684
Epoch [34][140/1780], lr: 1.25000e-04, loss: 4.6321
Epoch [34][145/1780], lr: 1.25000e-04, loss: 4.8769
Epoch [34][150/1780], lr: 1.25000e-04, loss: 5.3817
Epoch [34][155/1780], lr: 1.25000e-04, loss: 4.6042
Epoch [34][160/1780], lr: 1.25000e-04, loss: 4.3533
Epoch [34][165/1780], lr: 1.25000e-04, loss: 4.9826
Epoch [34][170/1780], lr: 1.25000e-04, loss: 4.593
Epoch [34][175/1780], lr: 1.25000e-04, loss: 3.8183
Epoch [34][180/1780

Epoch [34][880/1780], lr: 1.25000e-04, loss: 4.6245
Epoch [34][885/1780], lr: 1.25000e-04, loss: 4.8777
Epoch [34][890/1780], lr: 1.25000e-04, loss: 4.209
Epoch [34][895/1780], lr: 1.25000e-04, loss: 4.6298
Epoch [34][900/1780], lr: 1.25000e-04, loss: 4.0444
Epoch [34][905/1780], lr: 1.25000e-04, loss: 4.0283
Epoch [34][910/1780], lr: 1.25000e-04, loss: 4.5527
Epoch [34][915/1780], lr: 1.25000e-04, loss: 4.9
Epoch [34][920/1780], lr: 1.25000e-04, loss: 4.6274
Epoch [34][925/1780], lr: 1.25000e-04, loss: 4.034
Epoch [34][930/1780], lr: 1.25000e-04, loss: 5.0959
Epoch [34][935/1780], lr: 1.25000e-04, loss: 4.4609
Epoch [34][940/1780], lr: 1.25000e-04, loss: 4.8872
Epoch [34][945/1780], lr: 1.25000e-04, loss: 4.6837
Epoch [34][950/1780], lr: 1.25000e-04, loss: 4.4905
Epoch [34][955/1780], lr: 1.25000e-04, loss: 4.957
Epoch [34][960/1780], lr: 1.25000e-04, loss: 4.6549
Epoch [34][965/1780], lr: 1.25000e-04, loss: 4.9933
Epoch [34][970/1780], lr: 1.25000e-04, loss: 4.4997
Epoch [34][975/178

Epoch [34][1660/1780], lr: 1.25000e-04, loss: 4.7419
Epoch [34][1665/1780], lr: 1.25000e-04, loss: 3.5308
Epoch [34][1670/1780], lr: 1.25000e-04, loss: 4.6016
Epoch [34][1675/1780], lr: 1.25000e-04, loss: 5.1949
Epoch [34][1680/1780], lr: 1.25000e-04, loss: 5.1368
Epoch [34][1685/1780], lr: 1.25000e-04, loss: 4.3377
Epoch [34][1690/1780], lr: 1.25000e-04, loss: 4.4772
Epoch [34][1695/1780], lr: 1.25000e-04, loss: 5.2941
Epoch [34][1700/1780], lr: 1.25000e-04, loss: 5.5132
Epoch [34][1705/1780], lr: 1.25000e-04, loss: 4.8226
Epoch [34][1710/1780], lr: 1.25000e-04, loss: 4.7386
Epoch [34][1715/1780], lr: 1.25000e-04, loss: 4.2748
Epoch [34][1720/1780], lr: 1.25000e-04, loss: 5.0234
Epoch [34][1725/1780], lr: 1.25000e-04, loss: 4.3458
Epoch [34][1730/1780], lr: 1.25000e-04, loss: 4.6193
Epoch [34][1735/1780], lr: 1.25000e-04, loss: 5.1077
Epoch [34][1740/1780], lr: 1.25000e-04, loss: 4.4776
Epoch [34][1745/1780], lr: 1.25000e-04, loss: 4.2343
Epoch [34][1750/1780], lr: 1.25000e-04, loss: 

Epoch [35][660/1780], lr: 1.25000e-04, loss: 4.358
Epoch [35][665/1780], lr: 1.25000e-04, loss: 4.3927
Epoch [35][670/1780], lr: 1.25000e-04, loss: 4.4797
Epoch [35][675/1780], lr: 1.25000e-04, loss: 4.6078
Epoch [35][680/1780], lr: 1.25000e-04, loss: 4.8964
Epoch [35][685/1780], lr: 1.25000e-04, loss: 4.2159
Epoch [35][690/1780], lr: 1.25000e-04, loss: 4.6335
Epoch [35][695/1780], lr: 1.25000e-04, loss: 4.346
Epoch [35][700/1780], lr: 1.25000e-04, loss: 4.701
Epoch [35][705/1780], lr: 1.25000e-04, loss: 4.7351
Epoch [35][710/1780], lr: 1.25000e-04, loss: 4.3069
Epoch [35][715/1780], lr: 1.25000e-04, loss: 3.8693
Epoch [35][720/1780], lr: 1.25000e-04, loss: 4.4448
Epoch [35][725/1780], lr: 1.25000e-04, loss: 5.4308
Epoch [35][730/1780], lr: 1.25000e-04, loss: 4.6026
Epoch [35][735/1780], lr: 1.25000e-04, loss: 5.0491
Epoch [35][740/1780], lr: 1.25000e-04, loss: 5.534
Epoch [35][745/1780], lr: 1.25000e-04, loss: 4.1879
Epoch [35][750/1780], lr: 1.25000e-04, loss: 4.9787
Epoch [35][755/1

Epoch [35][1445/1780], lr: 1.25000e-04, loss: 4.5742
Epoch [35][1450/1780], lr: 1.25000e-04, loss: 4.65
Epoch [35][1455/1780], lr: 1.25000e-04, loss: 4.2678
Epoch [35][1460/1780], lr: 1.25000e-04, loss: 5.5153
Epoch [35][1465/1780], lr: 1.25000e-04, loss: 5.542
Epoch [35][1470/1780], lr: 1.25000e-04, loss: 4.3813
Epoch [35][1475/1780], lr: 1.25000e-04, loss: 5.3096
Epoch [35][1480/1780], lr: 1.25000e-04, loss: 4.4717
Epoch [35][1485/1780], lr: 1.25000e-04, loss: 3.8873
Epoch [35][1490/1780], lr: 1.25000e-04, loss: 4.4183
Epoch [35][1495/1780], lr: 1.25000e-04, loss: 4.21
Epoch [35][1500/1780], lr: 1.25000e-04, loss: 4.1465
Epoch [35][1505/1780], lr: 1.25000e-04, loss: 3.7656
Epoch [35][1510/1780], lr: 1.25000e-04, loss: 4.4869
Epoch [35][1515/1780], lr: 1.25000e-04, loss: 5.272
Epoch [35][1520/1780], lr: 1.25000e-04, loss: 3.9253
Epoch [35][1525/1780], lr: 1.25000e-04, loss: 4.0552
Epoch [35][1530/1780], lr: 1.25000e-04, loss: 5.5293
Epoch [35][1535/1780], lr: 1.25000e-04, loss: 4.3778

Epoch [36][440/1780], lr: 1.25000e-04, loss: 4.6058
Epoch [36][445/1780], lr: 1.25000e-04, loss: 5.0794
Epoch [36][450/1780], lr: 1.25000e-04, loss: 4.9796
Epoch [36][455/1780], lr: 1.25000e-04, loss: 4.2821
Epoch [36][460/1780], lr: 1.25000e-04, loss: 3.7895
Epoch [36][465/1780], lr: 1.25000e-04, loss: 4.1607
Epoch [36][470/1780], lr: 1.25000e-04, loss: 4.5154
Epoch [36][475/1780], lr: 1.25000e-04, loss: 3.3911
Epoch [36][480/1780], lr: 1.25000e-04, loss: 5.3597
Epoch [36][485/1780], lr: 1.25000e-04, loss: 4.8534
Epoch [36][490/1780], lr: 1.25000e-04, loss: 4.0141
Epoch [36][495/1780], lr: 1.25000e-04, loss: 4.2163
Epoch [36][500/1780], lr: 1.25000e-04, loss: 4.2295
Epoch [36][505/1780], lr: 1.25000e-04, loss: 5.0193
Epoch [36][510/1780], lr: 1.25000e-04, loss: 5.2439
Epoch [36][515/1780], lr: 1.25000e-04, loss: 4.5463
Epoch [36][520/1780], lr: 1.25000e-04, loss: 4.0802
Epoch [36][525/1780], lr: 1.25000e-04, loss: 4.6247
Epoch [36][530/1780], lr: 1.25000e-04, loss: 4.6339
Epoch [36][5

Epoch [36][1230/1780], lr: 1.25000e-04, loss: 4.1089
Epoch [36][1235/1780], lr: 1.25000e-04, loss: 4.671
Epoch [36][1240/1780], lr: 1.25000e-04, loss: 5.4965
Epoch [36][1245/1780], lr: 1.25000e-04, loss: 4.1557
Epoch [36][1250/1780], lr: 1.25000e-04, loss: 4.2097
Epoch [36][1255/1780], lr: 1.25000e-04, loss: 4.4909
Epoch [36][1260/1780], lr: 1.25000e-04, loss: 4.7817
Epoch [36][1265/1780], lr: 1.25000e-04, loss: 5.6553
Epoch [36][1270/1780], lr: 1.25000e-04, loss: 4.5735
Epoch [36][1275/1780], lr: 1.25000e-04, loss: 5.1931
Epoch [36][1280/1780], lr: 1.25000e-04, loss: 5.6129
Epoch [36][1285/1780], lr: 1.25000e-04, loss: 5.1342
Epoch [36][1290/1780], lr: 1.25000e-04, loss: 4.0056
Epoch [36][1295/1780], lr: 1.25000e-04, loss: 4.1141
Epoch [36][1300/1780], lr: 1.25000e-04, loss: 4.9253
Epoch [36][1305/1780], lr: 1.25000e-04, loss: 3.6063
Epoch [36][1310/1780], lr: 1.25000e-04, loss: 4.2887
Epoch [36][1315/1780], lr: 1.25000e-04, loss: 3.8625
Epoch [36][1320/1780], lr: 1.25000e-04, loss: 5

Epoch [37][220/1780], lr: 1.25000e-04, loss: 4.5939
Epoch [37][225/1780], lr: 1.25000e-04, loss: 4.1651
Epoch [37][230/1780], lr: 1.25000e-04, loss: 4.5094
Epoch [37][235/1780], lr: 1.25000e-04, loss: 5.5119
Epoch [37][240/1780], lr: 1.25000e-04, loss: 4.7796
Epoch [37][245/1780], lr: 1.25000e-04, loss: 3.7121
Epoch [37][250/1780], lr: 1.25000e-04, loss: 4.4242
Epoch [37][255/1780], lr: 1.25000e-04, loss: 4.9428
Epoch [37][260/1780], lr: 1.25000e-04, loss: 4.7473
Epoch [37][265/1780], lr: 1.25000e-04, loss: 4.7772
Epoch [37][270/1780], lr: 1.25000e-04, loss: 4.6509
Epoch [37][275/1780], lr: 1.25000e-04, loss: 3.9102
Epoch [37][280/1780], lr: 1.25000e-04, loss: 4.1598
Epoch [37][285/1780], lr: 1.25000e-04, loss: 4.9472
Epoch [37][290/1780], lr: 1.25000e-04, loss: 4.418
Epoch [37][295/1780], lr: 1.25000e-04, loss: 4.1692
Epoch [37][300/1780], lr: 1.25000e-04, loss: 4.4458
Epoch [37][305/1780], lr: 1.25000e-04, loss: 3.8815
Epoch [37][310/1780], lr: 1.25000e-04, loss: 5.085
Epoch [37][315

Epoch [37][1010/1780], lr: 1.25000e-04, loss: 4.5438
Epoch [37][1015/1780], lr: 1.25000e-04, loss: 3.8967
Epoch [37][1020/1780], lr: 1.25000e-04, loss: 4.8994
Epoch [37][1025/1780], lr: 1.25000e-04, loss: 4.9291
Epoch [37][1030/1780], lr: 1.25000e-04, loss: 4.5216
Epoch [37][1035/1780], lr: 1.25000e-04, loss: 4.5689
Epoch [37][1040/1780], lr: 1.25000e-04, loss: 5.2049
Epoch [37][1045/1780], lr: 1.25000e-04, loss: 3.9505
Epoch [37][1050/1780], lr: 1.25000e-04, loss: 4.4151
Epoch [37][1055/1780], lr: 1.25000e-04, loss: 3.8283
Epoch [37][1060/1780], lr: 1.25000e-04, loss: 3.678
Epoch [37][1065/1780], lr: 1.25000e-04, loss: 4.5627
Epoch [37][1070/1780], lr: 1.25000e-04, loss: 4.1284
Epoch [37][1075/1780], lr: 1.25000e-04, loss: 4.5544
Epoch [37][1080/1780], lr: 1.25000e-04, loss: 3.633
Epoch [37][1085/1780], lr: 1.25000e-04, loss: 4.7942
Epoch [37][1090/1780], lr: 1.25000e-04, loss: 4.6944
Epoch [37][1095/1780], lr: 1.25000e-04, loss: 4.4611
Epoch [37][1100/1780], lr: 1.25000e-04, loss: 4.

top1_acc: 0.03488, top5_acc: 0.124, train_loss: 3.9066, val_loss: 4.5339
Saving checkpoint at 37 epochs...
Epoch [38][5/1780], lr: 1.25000e-04, loss: 4.1077
Epoch [38][10/1780], lr: 1.25000e-04, loss: 4.3094
Epoch [38][15/1780], lr: 1.25000e-04, loss: 5.1359
Epoch [38][20/1780], lr: 1.25000e-04, loss: 3.9603
Epoch [38][25/1780], lr: 1.25000e-04, loss: 4.8151
Epoch [38][30/1780], lr: 1.25000e-04, loss: 4.6673
Epoch [38][35/1780], lr: 1.25000e-04, loss: 4.139
Epoch [38][40/1780], lr: 1.25000e-04, loss: 3.7835
Epoch [38][45/1780], lr: 1.25000e-04, loss: 4.8802
Epoch [38][50/1780], lr: 1.25000e-04, loss: 5.15
Epoch [38][55/1780], lr: 1.25000e-04, loss: 4.2524
Epoch [38][60/1780], lr: 1.25000e-04, loss: 5.0238
Epoch [38][65/1780], lr: 1.25000e-04, loss: 4.51
Epoch [38][70/1780], lr: 1.25000e-04, loss: 4.2151
Epoch [38][75/1780], lr: 1.25000e-04, loss: 4.1904
Epoch [38][80/1780], lr: 1.25000e-04, loss: 4.0361
Epoch [38][85/1780], lr: 1.25000e-04, loss: 4.6626
Epoch [38][90/1780], lr: 1.25000

Epoch [38][790/1780], lr: 1.25000e-04, loss: 4.4599
Epoch [38][795/1780], lr: 1.25000e-04, loss: 4.8683
Epoch [38][800/1780], lr: 1.25000e-04, loss: 4.4331
Epoch [38][805/1780], lr: 1.25000e-04, loss: 4.0366
Epoch [38][810/1780], lr: 1.25000e-04, loss: 4.7615
Epoch [38][815/1780], lr: 1.25000e-04, loss: 4.9729
Epoch [38][820/1780], lr: 1.25000e-04, loss: 4.083
Epoch [38][825/1780], lr: 1.25000e-04, loss: 4.2911
Epoch [38][830/1780], lr: 1.25000e-04, loss: 4.709
Epoch [38][835/1780], lr: 1.25000e-04, loss: 4.533
Epoch [38][840/1780], lr: 1.25000e-04, loss: 4.9667
Epoch [38][845/1780], lr: 1.25000e-04, loss: 4.3388
Epoch [38][850/1780], lr: 1.25000e-04, loss: 2.7581
Epoch [38][855/1780], lr: 1.25000e-04, loss: 4.1605
Epoch [38][860/1780], lr: 1.25000e-04, loss: 4.7982
Epoch [38][865/1780], lr: 1.25000e-04, loss: 5.7684
Epoch [38][870/1780], lr: 1.25000e-04, loss: 4.3977
Epoch [38][875/1780], lr: 1.25000e-04, loss: 3.4591
Epoch [38][880/1780], lr: 1.25000e-04, loss: 4.8928
Epoch [38][885/

Epoch [38][1570/1780], lr: 1.25000e-04, loss: 4.6584
Epoch [38][1575/1780], lr: 1.25000e-04, loss: 4.6067
Epoch [38][1580/1780], lr: 1.25000e-04, loss: 3.8221
Epoch [38][1585/1780], lr: 1.25000e-04, loss: 4.5253
Epoch [38][1590/1780], lr: 1.25000e-04, loss: 4.3712
Epoch [38][1595/1780], lr: 1.25000e-04, loss: 4.585
Epoch [38][1600/1780], lr: 1.25000e-04, loss: 4.7423
Epoch [38][1605/1780], lr: 1.25000e-04, loss: 4.0129
Epoch [38][1610/1780], lr: 1.25000e-04, loss: 4.0055
Epoch [38][1615/1780], lr: 1.25000e-04, loss: 4.2618
Epoch [38][1620/1780], lr: 1.25000e-04, loss: 4.4972
Epoch [38][1625/1780], lr: 1.25000e-04, loss: 4.2856
Epoch [38][1630/1780], lr: 1.25000e-04, loss: 4.7016
Epoch [38][1635/1780], lr: 1.25000e-04, loss: 4.5268
Epoch [38][1640/1780], lr: 1.25000e-04, loss: 5.1117
Epoch [38][1645/1780], lr: 1.25000e-04, loss: 4.6984
Epoch [38][1650/1780], lr: 1.25000e-04, loss: 3.864
Epoch [38][1655/1780], lr: 1.25000e-04, loss: 4.4067
Epoch [38][1660/1780], lr: 1.25000e-04, loss: 3.

Epoch [39][565/1780], lr: 1.25000e-04, loss: 4.7046
Epoch [39][570/1780], lr: 1.25000e-04, loss: 3.81
Epoch [39][575/1780], lr: 1.25000e-04, loss: 4.0663
Epoch [39][580/1780], lr: 1.25000e-04, loss: 4.2632
Epoch [39][585/1780], lr: 1.25000e-04, loss: 4.1714
Epoch [39][590/1780], lr: 1.25000e-04, loss: 4.0187
Epoch [39][595/1780], lr: 1.25000e-04, loss: 4.5787
Epoch [39][600/1780], lr: 1.25000e-04, loss: 3.5172
Epoch [39][605/1780], lr: 1.25000e-04, loss: 4.3546
Epoch [39][610/1780], lr: 1.25000e-04, loss: 4.631
Epoch [39][615/1780], lr: 1.25000e-04, loss: 4.2626
Epoch [39][620/1780], lr: 1.25000e-04, loss: 4.4018
Epoch [39][625/1780], lr: 1.25000e-04, loss: 4.9081
Epoch [39][630/1780], lr: 1.25000e-04, loss: 4.8903
Epoch [39][635/1780], lr: 1.25000e-04, loss: 4.078
Epoch [39][640/1780], lr: 1.25000e-04, loss: 3.5224
Epoch [39][645/1780], lr: 1.25000e-04, loss: 5.1288
Epoch [39][650/1780], lr: 1.25000e-04, loss: 5.3169
Epoch [39][655/1780], lr: 1.25000e-04, loss: 3.5624
Epoch [39][660/1

Epoch [39][1350/1780], lr: 1.25000e-04, loss: 3.4596
Epoch [39][1355/1780], lr: 1.25000e-04, loss: 4.3984
Epoch [39][1360/1780], lr: 1.25000e-04, loss: 3.8071
Epoch [39][1365/1780], lr: 1.25000e-04, loss: 4.6206
Epoch [39][1370/1780], lr: 1.25000e-04, loss: 3.9506
Epoch [39][1375/1780], lr: 1.25000e-04, loss: 4.6514
Epoch [39][1380/1780], lr: 1.25000e-04, loss: 4.5713
Epoch [39][1385/1780], lr: 1.25000e-04, loss: 3.9583
Epoch [39][1390/1780], lr: 1.25000e-04, loss: 5.0566
Epoch [39][1395/1780], lr: 1.25000e-04, loss: 3.9446
Epoch [39][1400/1780], lr: 1.25000e-04, loss: 5.394
Epoch [39][1405/1780], lr: 1.25000e-04, loss: 5.306
Epoch [39][1410/1780], lr: 1.25000e-04, loss: 3.7874
Epoch [39][1415/1780], lr: 1.25000e-04, loss: 4.3954
Epoch [39][1420/1780], lr: 1.25000e-04, loss: 4.5464
Epoch [39][1425/1780], lr: 1.25000e-04, loss: 3.3049
Epoch [39][1430/1780], lr: 1.25000e-04, loss: 5.0584
Epoch [39][1435/1780], lr: 1.25000e-04, loss: 4.0899
Epoch [39][1440/1780], lr: 1.25000e-04, loss: 4.

Epoch [40][345/1780], lr: 1.25000e-04, loss: 4.0505
Epoch [40][350/1780], lr: 1.25000e-04, loss: 4.371
Epoch [40][355/1780], lr: 1.25000e-04, loss: 4.0683
Epoch [40][360/1780], lr: 1.25000e-04, loss: 4.1397
Epoch [40][365/1780], lr: 1.25000e-04, loss: 4.7784
Epoch [40][370/1780], lr: 1.25000e-04, loss: 4.5977
Epoch [40][375/1780], lr: 1.25000e-04, loss: 4.4908
Epoch [40][380/1780], lr: 1.25000e-04, loss: 2.6557
Epoch [40][385/1780], lr: 1.25000e-04, loss: 3.4116
Epoch [40][390/1780], lr: 1.25000e-04, loss: 4.1626
Epoch [40][395/1780], lr: 1.25000e-04, loss: 5.0552
Epoch [40][400/1780], lr: 1.25000e-04, loss: 4.0333
Epoch [40][405/1780], lr: 1.25000e-04, loss: 3.8463
Epoch [40][410/1780], lr: 1.25000e-04, loss: 5.2089
Epoch [40][415/1780], lr: 1.25000e-04, loss: 5.2334
Epoch [40][420/1780], lr: 1.25000e-04, loss: 3.6784
Epoch [40][425/1780], lr: 1.25000e-04, loss: 4.4404
Epoch [40][430/1780], lr: 1.25000e-04, loss: 4.6613
Epoch [40][435/1780], lr: 1.25000e-04, loss: 4.8849
Epoch [40][44

Epoch [40][1135/1780], lr: 1.25000e-04, loss: 3.8039
Epoch [40][1140/1780], lr: 1.25000e-04, loss: 4.3287
Epoch [40][1145/1780], lr: 1.25000e-04, loss: 5.0537
Epoch [40][1150/1780], lr: 1.25000e-04, loss: 4.572
Epoch [40][1155/1780], lr: 1.25000e-04, loss: 4.6868
Epoch [40][1160/1780], lr: 1.25000e-04, loss: 4.8068
Epoch [40][1165/1780], lr: 1.25000e-04, loss: 4.3395
Epoch [40][1170/1780], lr: 1.25000e-04, loss: 3.0104
Epoch [40][1175/1780], lr: 1.25000e-04, loss: 3.8027
Epoch [40][1180/1780], lr: 1.25000e-04, loss: 4.0903
Epoch [40][1185/1780], lr: 1.25000e-04, loss: 3.6248
Epoch [40][1190/1780], lr: 1.25000e-04, loss: 3.7401
Epoch [40][1195/1780], lr: 1.25000e-04, loss: 4.7064
Epoch [40][1200/1780], lr: 1.25000e-04, loss: 3.9286
Epoch [40][1205/1780], lr: 1.25000e-04, loss: 4.7087
Epoch [40][1210/1780], lr: 1.25000e-04, loss: 3.8774
Epoch [40][1215/1780], lr: 1.25000e-04, loss: 4.0766
Epoch [40][1220/1780], lr: 1.25000e-04, loss: 4.2501
Epoch [40][1225/1780], lr: 1.25000e-04, loss: 4

Epoch [41][125/1780], lr: 1.25000e-04, loss: 4.0376
Epoch [41][130/1780], lr: 1.25000e-04, loss: 4.3137
Epoch [41][135/1780], lr: 1.25000e-04, loss: 4.4914
Epoch [41][140/1780], lr: 1.25000e-04, loss: 4.3053
Epoch [41][145/1780], lr: 1.25000e-04, loss: 4.2318
Epoch [41][150/1780], lr: 1.25000e-04, loss: 3.5174
Epoch [41][155/1780], lr: 1.25000e-04, loss: 3.8154
Epoch [41][160/1780], lr: 1.25000e-04, loss: 4.5163
Epoch [41][165/1780], lr: 1.25000e-04, loss: 3.7023
Epoch [41][170/1780], lr: 1.25000e-04, loss: 3.9245
Epoch [41][175/1780], lr: 1.25000e-04, loss: 5.1513
Epoch [41][180/1780], lr: 1.25000e-04, loss: 3.3966
Epoch [41][185/1780], lr: 1.25000e-04, loss: 3.95
Epoch [41][190/1780], lr: 1.25000e-04, loss: 3.9885
Epoch [41][195/1780], lr: 1.25000e-04, loss: 5.7248
Epoch [41][200/1780], lr: 1.25000e-04, loss: 4.0442
Epoch [41][205/1780], lr: 1.25000e-04, loss: 4.084
Epoch [41][210/1780], lr: 1.25000e-04, loss: 4.3851
Epoch [41][215/1780], lr: 1.25000e-04, loss: 4.8513
Epoch [41][220/

Epoch [41][915/1780], lr: 1.25000e-04, loss: 4.3461
Epoch [41][920/1780], lr: 1.25000e-04, loss: 4.0524
Epoch [41][925/1780], lr: 1.25000e-04, loss: 4.2112
Epoch [41][930/1780], lr: 1.25000e-04, loss: 3.7814
Epoch [41][935/1780], lr: 1.25000e-04, loss: 4.1057
Epoch [41][940/1780], lr: 1.25000e-04, loss: 3.4499
Epoch [41][945/1780], lr: 1.25000e-04, loss: 4.0757
Epoch [41][950/1780], lr: 1.25000e-04, loss: 3.248
Epoch [41][955/1780], lr: 1.25000e-04, loss: 5.8856
Epoch [41][960/1780], lr: 1.25000e-04, loss: 4.8706
Epoch [41][965/1780], lr: 1.25000e-04, loss: 3.6865
Epoch [41][970/1780], lr: 1.25000e-04, loss: 4.8121
Epoch [41][975/1780], lr: 1.25000e-04, loss: 4.4469
Epoch [41][980/1780], lr: 1.25000e-04, loss: 4.9527
Epoch [41][985/1780], lr: 1.25000e-04, loss: 4.2823
Epoch [41][990/1780], lr: 1.25000e-04, loss: 3.973
Epoch [41][995/1780], lr: 1.25000e-04, loss: 4.6556
Epoch [41][1000/1780], lr: 1.25000e-04, loss: 4.5035
Epoch [41][1005/1780], lr: 1.25000e-04, loss: 4.7776
Epoch [41][1

Epoch [41][1695/1780], lr: 1.25000e-04, loss: 4.7327
Epoch [41][1700/1780], lr: 1.25000e-04, loss: 3.5638
Epoch [41][1705/1780], lr: 1.25000e-04, loss: 5.789
Epoch [41][1710/1780], lr: 1.25000e-04, loss: 3.7998
Epoch [41][1715/1780], lr: 1.25000e-04, loss: 4.0407
Epoch [41][1720/1780], lr: 1.25000e-04, loss: 4.3113
Epoch [41][1725/1780], lr: 1.25000e-04, loss: 4.6001
Epoch [41][1730/1780], lr: 1.25000e-04, loss: 4.4338
Epoch [41][1735/1780], lr: 1.25000e-04, loss: 3.6257
Epoch [41][1740/1780], lr: 1.25000e-04, loss: 3.2955
Epoch [41][1745/1780], lr: 1.25000e-04, loss: 4.6541
Epoch [41][1750/1780], lr: 1.25000e-04, loss: 4.6808
Epoch [41][1755/1780], lr: 1.25000e-04, loss: 3.8972
Epoch [41][1760/1780], lr: 1.25000e-04, loss: 3.671
Epoch [41][1765/1780], lr: 1.25000e-04, loss: 4.6097
Epoch [41][1770/1780], lr: 1.25000e-04, loss: 4.3865
Epoch [41][1775/1780], lr: 1.25000e-04, loss: 5.1377
Epoch [41][1780/1780], lr: 1.25000e-04, loss: 4.9543
Evaluating top_k_accuracy...
top1_acc: 0.04264, 

Epoch [42][695/1780], lr: 1.25000e-04, loss: 4.1731
Epoch [42][700/1780], lr: 1.25000e-04, loss: 5.2253
Epoch [42][705/1780], lr: 1.25000e-04, loss: 3.446
Epoch [42][710/1780], lr: 1.25000e-04, loss: 4.8026
Epoch [42][715/1780], lr: 1.25000e-04, loss: 3.9078
Epoch [42][720/1780], lr: 1.25000e-04, loss: 6.0397
Epoch [42][725/1780], lr: 1.25000e-04, loss: 5.7479
Epoch [42][730/1780], lr: 1.25000e-04, loss: 5.276
Epoch [42][735/1780], lr: 1.25000e-04, loss: 4.9852
Epoch [42][740/1780], lr: 1.25000e-04, loss: 4.1541
Epoch [42][745/1780], lr: 1.25000e-04, loss: 3.6672
Epoch [42][750/1780], lr: 1.25000e-04, loss: 3.2881
Epoch [42][755/1780], lr: 1.25000e-04, loss: 4.5888
Epoch [42][760/1780], lr: 1.25000e-04, loss: 4.2172
Epoch [42][765/1780], lr: 1.25000e-04, loss: 4.11
Epoch [42][770/1780], lr: 1.25000e-04, loss: 4.5059
Epoch [42][775/1780], lr: 1.25000e-04, loss: 3.8471
Epoch [42][780/1780], lr: 1.25000e-04, loss: 4.2221
Epoch [42][785/1780], lr: 1.25000e-04, loss: 3.9098
Epoch [42][790/1

Epoch [42][1480/1780], lr: 1.25000e-04, loss: 4.0163
Epoch [42][1485/1780], lr: 1.25000e-04, loss: 4.6287
Epoch [42][1490/1780], lr: 1.25000e-04, loss: 5.0506
Epoch [42][1495/1780], lr: 1.25000e-04, loss: 4.796
Epoch [42][1500/1780], lr: 1.25000e-04, loss: 4.7975
Epoch [42][1505/1780], lr: 1.25000e-04, loss: 3.9875
Epoch [42][1510/1780], lr: 1.25000e-04, loss: 4.0795
Epoch [42][1515/1780], lr: 1.25000e-04, loss: 4.6226
Epoch [42][1520/1780], lr: 1.25000e-04, loss: 3.5944
Epoch [42][1525/1780], lr: 1.25000e-04, loss: 5.2629
Epoch [42][1530/1780], lr: 1.25000e-04, loss: 4.8973
Epoch [42][1535/1780], lr: 1.25000e-04, loss: 4.3211
Epoch [42][1540/1780], lr: 1.25000e-04, loss: 3.8178
Epoch [42][1545/1780], lr: 1.25000e-04, loss: 4.3599
Epoch [42][1550/1780], lr: 1.25000e-04, loss: 4.2524
Epoch [42][1555/1780], lr: 1.25000e-04, loss: 4.5813
Epoch [42][1560/1780], lr: 1.25000e-04, loss: 4.8562
Epoch [42][1565/1780], lr: 1.25000e-04, loss: 4.6357
Epoch [42][1570/1780], lr: 1.25000e-04, loss: 4

Epoch [43][475/1780], lr: 1.25000e-04, loss: 4.6705
Epoch [43][480/1780], lr: 1.25000e-04, loss: 3.9368
Epoch [43][485/1780], lr: 1.25000e-04, loss: 4.18
Epoch [43][490/1780], lr: 1.25000e-04, loss: 3.0629
Epoch [43][495/1780], lr: 1.25000e-04, loss: 4.1167
Epoch [43][500/1780], lr: 1.25000e-04, loss: 3.526
Epoch [43][505/1780], lr: 1.25000e-04, loss: 4.4213
Epoch [43][510/1780], lr: 1.25000e-04, loss: 4.2086
Epoch [43][515/1780], lr: 1.25000e-04, loss: 4.3945
Epoch [43][520/1780], lr: 1.25000e-04, loss: 3.9445
Epoch [43][525/1780], lr: 1.25000e-04, loss: 5.0852
Epoch [43][530/1780], lr: 1.25000e-04, loss: 4.2669
Epoch [43][535/1780], lr: 1.25000e-04, loss: 3.6387
Epoch [43][540/1780], lr: 1.25000e-04, loss: 3.7052
Epoch [43][545/1780], lr: 1.25000e-04, loss: 3.8785
Epoch [43][550/1780], lr: 1.25000e-04, loss: 4.3483
Epoch [43][555/1780], lr: 1.25000e-04, loss: 4.3313
Epoch [43][560/1780], lr: 1.25000e-04, loss: 3.9408
Epoch [43][565/1780], lr: 1.25000e-04, loss: 4.3723
Epoch [43][570/

Epoch [43][1260/1780], lr: 1.25000e-04, loss: 4.5763
Epoch [43][1265/1780], lr: 1.25000e-04, loss: 4.2236
Epoch [43][1270/1780], lr: 1.25000e-04, loss: 3.8499
Epoch [43][1275/1780], lr: 1.25000e-04, loss: 3.4304
Epoch [43][1280/1780], lr: 1.25000e-04, loss: 5.1541
Epoch [43][1285/1780], lr: 1.25000e-04, loss: 3.5658
Epoch [43][1290/1780], lr: 1.25000e-04, loss: 4.0869
Epoch [43][1295/1780], lr: 1.25000e-04, loss: 3.2533
Epoch [43][1300/1780], lr: 1.25000e-04, loss: 3.9801
Epoch [43][1305/1780], lr: 1.25000e-04, loss: 4.5737
Epoch [43][1310/1780], lr: 1.25000e-04, loss: 4.3026
Epoch [43][1315/1780], lr: 1.25000e-04, loss: 4.5032
Epoch [43][1320/1780], lr: 1.25000e-04, loss: 3.7334
Epoch [43][1325/1780], lr: 1.25000e-04, loss: 4.0722
Epoch [43][1330/1780], lr: 1.25000e-04, loss: 4.5586
Epoch [43][1335/1780], lr: 1.25000e-04, loss: 3.7641
Epoch [43][1340/1780], lr: 1.25000e-04, loss: 4.3023
Epoch [43][1345/1780], lr: 1.25000e-04, loss: 4.8305
Epoch [43][1350/1780], lr: 1.25000e-04, loss: 

Epoch [44][250/1780], lr: 1.25000e-04, loss: 3.8695
Epoch [44][255/1780], lr: 1.25000e-04, loss: 3.4371
Epoch [44][260/1780], lr: 1.25000e-04, loss: 4.5115
Epoch [44][265/1780], lr: 1.25000e-04, loss: 4.7665
Epoch [44][270/1780], lr: 1.25000e-04, loss: 4.2278
Epoch [44][275/1780], lr: 1.25000e-04, loss: 3.3526
Epoch [44][280/1780], lr: 1.25000e-04, loss: 3.4096
Epoch [44][285/1780], lr: 1.25000e-04, loss: 4.2257
Epoch [44][290/1780], lr: 1.25000e-04, loss: 3.6445
Epoch [44][295/1780], lr: 1.25000e-04, loss: 5.4849
Epoch [44][300/1780], lr: 1.25000e-04, loss: 4.4209
Epoch [44][305/1780], lr: 1.25000e-04, loss: 5.1943
Epoch [44][310/1780], lr: 1.25000e-04, loss: 5.1699
Epoch [44][315/1780], lr: 1.25000e-04, loss: 3.7175
Epoch [44][320/1780], lr: 1.25000e-04, loss: 3.8726
Epoch [44][325/1780], lr: 1.25000e-04, loss: 3.9266
Epoch [44][330/1780], lr: 1.25000e-04, loss: 4.1141
Epoch [44][335/1780], lr: 1.25000e-04, loss: 3.9109
Epoch [44][340/1780], lr: 1.25000e-04, loss: 4.9109
Epoch [44][3

Epoch [44][1040/1780], lr: 1.25000e-04, loss: 3.6885
Epoch [44][1045/1780], lr: 1.25000e-04, loss: 4.1308
Epoch [44][1050/1780], lr: 1.25000e-04, loss: 4.4583
Epoch [44][1055/1780], lr: 1.25000e-04, loss: 4.0401
Epoch [44][1060/1780], lr: 1.25000e-04, loss: 4.2788
Epoch [44][1065/1780], lr: 1.25000e-04, loss: 3.6384
Epoch [44][1070/1780], lr: 1.25000e-04, loss: 3.8882
Epoch [44][1075/1780], lr: 1.25000e-04, loss: 4.8279
Epoch [44][1080/1780], lr: 1.25000e-04, loss: 3.356
Epoch [44][1085/1780], lr: 1.25000e-04, loss: 4.5639
Epoch [44][1090/1780], lr: 1.25000e-04, loss: 3.9647
Epoch [44][1095/1780], lr: 1.25000e-04, loss: 4.4619
Epoch [44][1100/1780], lr: 1.25000e-04, loss: 4.47
Epoch [44][1105/1780], lr: 1.25000e-04, loss: 4.0568
Epoch [44][1110/1780], lr: 1.25000e-04, loss: 4.5499
Epoch [44][1115/1780], lr: 1.25000e-04, loss: 2.9927
Epoch [44][1120/1780], lr: 1.25000e-04, loss: 4.3711
Epoch [44][1125/1780], lr: 1.25000e-04, loss: 3.31
Epoch [44][1130/1780], lr: 1.25000e-04, loss: 3.999

Epoch [45][25/1780], lr: 1.25000e-04, loss: 3.6703
Epoch [45][30/1780], lr: 1.25000e-04, loss: 3.7589
Epoch [45][35/1780], lr: 1.25000e-04, loss: 2.6766
Epoch [45][40/1780], lr: 1.25000e-04, loss: 3.8076
Epoch [45][45/1780], lr: 1.25000e-04, loss: 4.59
Epoch [45][50/1780], lr: 1.25000e-04, loss: 3.8055
Epoch [45][55/1780], lr: 1.25000e-04, loss: 4.1501
Epoch [45][60/1780], lr: 1.25000e-04, loss: 4.294
Epoch [45][65/1780], lr: 1.25000e-04, loss: 4.5577
Epoch [45][70/1780], lr: 1.25000e-04, loss: 4.1628
Epoch [45][75/1780], lr: 1.25000e-04, loss: 4.0105
Epoch [45][80/1780], lr: 1.25000e-04, loss: 3.6718
Epoch [45][85/1780], lr: 1.25000e-04, loss: 3.2714
Epoch [45][90/1780], lr: 1.25000e-04, loss: 4.6977
Epoch [45][95/1780], lr: 1.25000e-04, loss: 4.217
Epoch [45][100/1780], lr: 1.25000e-04, loss: 5.6448
Epoch [45][105/1780], lr: 1.25000e-04, loss: 5.5041
Epoch [45][110/1780], lr: 1.25000e-04, loss: 3.6635
Epoch [45][115/1780], lr: 1.25000e-04, loss: 2.8257
Epoch [45][120/1780], lr: 1.250

Epoch [45][820/1780], lr: 1.25000e-04, loss: 4.736
Epoch [45][825/1780], lr: 1.25000e-04, loss: 5.2871
Epoch [45][830/1780], lr: 1.25000e-04, loss: 4.3187
Epoch [45][835/1780], lr: 1.25000e-04, loss: 3.4929
Epoch [45][840/1780], lr: 1.25000e-04, loss: 4.3655
Epoch [45][845/1780], lr: 1.25000e-04, loss: 3.5557
Epoch [45][850/1780], lr: 1.25000e-04, loss: 4.9275
Epoch [45][855/1780], lr: 1.25000e-04, loss: 4.6239
Epoch [45][860/1780], lr: 1.25000e-04, loss: 4.63
Epoch [45][865/1780], lr: 1.25000e-04, loss: 3.4418
Epoch [45][870/1780], lr: 1.25000e-04, loss: 4.0098
Epoch [45][875/1780], lr: 1.25000e-04, loss: 3.6437
Epoch [45][880/1780], lr: 1.25000e-04, loss: 4.4944
Epoch [45][885/1780], lr: 1.25000e-04, loss: 4.1986
Epoch [45][890/1780], lr: 1.25000e-04, loss: 4.3671
Epoch [45][895/1780], lr: 1.25000e-04, loss: 3.8468
Epoch [45][900/1780], lr: 1.25000e-04, loss: 4.5861
Epoch [45][905/1780], lr: 1.25000e-04, loss: 3.9498
Epoch [45][910/1780], lr: 1.25000e-04, loss: 3.6285
Epoch [45][915/

Epoch [45][1600/1780], lr: 1.25000e-04, loss: 2.6826
Epoch [45][1605/1780], lr: 1.25000e-04, loss: 3.9154
Epoch [45][1610/1780], lr: 1.25000e-04, loss: 3.4257
Epoch [45][1615/1780], lr: 1.25000e-04, loss: 4.0454
Epoch [45][1620/1780], lr: 1.25000e-04, loss: 4.1754
Epoch [45][1625/1780], lr: 1.25000e-04, loss: 4.8788
Epoch [45][1630/1780], lr: 1.25000e-04, loss: 3.9625
Epoch [45][1635/1780], lr: 1.25000e-04, loss: 4.7417
Epoch [45][1640/1780], lr: 1.25000e-04, loss: 3.8177
Epoch [45][1645/1780], lr: 1.25000e-04, loss: 4.7927
Epoch [45][1650/1780], lr: 1.25000e-04, loss: 3.629
Epoch [45][1655/1780], lr: 1.25000e-04, loss: 4.6771
Epoch [45][1660/1780], lr: 1.25000e-04, loss: 4.3265
Epoch [45][1665/1780], lr: 1.25000e-04, loss: 3.425
Epoch [45][1670/1780], lr: 1.25000e-04, loss: 4.0079
Epoch [45][1675/1780], lr: 1.25000e-04, loss: 4.8092
Epoch [45][1680/1780], lr: 1.25000e-04, loss: 3.78
Epoch [45][1685/1780], lr: 1.25000e-04, loss: 4.2071
Epoch [45][1690/1780], lr: 1.25000e-04, loss: 2.72

Epoch [46][595/1780], lr: 1.25000e-04, loss: 3.9925
Epoch [46][600/1780], lr: 1.25000e-04, loss: 3.8431
Epoch [46][605/1780], lr: 1.25000e-04, loss: 3.5719
Epoch [46][610/1780], lr: 1.25000e-04, loss: 4.3245
Epoch [46][615/1780], lr: 1.25000e-04, loss: 3.2352
Epoch [46][620/1780], lr: 1.25000e-04, loss: 3.9576
Epoch [46][625/1780], lr: 1.25000e-04, loss: 4.1055
Epoch [46][630/1780], lr: 1.25000e-04, loss: 4.8225
Epoch [46][635/1780], lr: 1.25000e-04, loss: 4.2464
Epoch [46][640/1780], lr: 1.25000e-04, loss: 3.041
Epoch [46][645/1780], lr: 1.25000e-04, loss: 4.2594
Epoch [46][650/1780], lr: 1.25000e-04, loss: 5.0994
Epoch [46][655/1780], lr: 1.25000e-04, loss: 3.2552
Epoch [46][660/1780], lr: 1.25000e-04, loss: 2.9321
Epoch [46][665/1780], lr: 1.25000e-04, loss: 4.0056
Epoch [46][670/1780], lr: 1.25000e-04, loss: 3.8723
Epoch [46][675/1780], lr: 1.25000e-04, loss: 4.5069
Epoch [46][680/1780], lr: 1.25000e-04, loss: 4.4697
Epoch [46][685/1780], lr: 1.25000e-04, loss: 2.9743
Epoch [46][69

Epoch [46][1380/1780], lr: 1.25000e-04, loss: 4.008
Epoch [46][1385/1780], lr: 1.25000e-04, loss: 4.9141
Epoch [46][1390/1780], lr: 1.25000e-04, loss: 4.1939
Epoch [46][1395/1780], lr: 1.25000e-04, loss: 4.4876
Epoch [46][1400/1780], lr: 1.25000e-04, loss: 2.9736
Epoch [46][1405/1780], lr: 1.25000e-04, loss: 3.8096
Epoch [46][1410/1780], lr: 1.25000e-04, loss: 3.6202
Epoch [46][1415/1780], lr: 1.25000e-04, loss: 4.1921
Epoch [46][1420/1780], lr: 1.25000e-04, loss: 4.9726
Epoch [46][1425/1780], lr: 1.25000e-04, loss: 4.0578
Epoch [46][1430/1780], lr: 1.25000e-04, loss: 3.8709
Epoch [46][1435/1780], lr: 1.25000e-04, loss: 3.0847
Epoch [46][1440/1780], lr: 1.25000e-04, loss: 4.1859
Epoch [46][1445/1780], lr: 1.25000e-04, loss: 3.5918
Epoch [46][1450/1780], lr: 1.25000e-04, loss: 3.9407
Epoch [46][1455/1780], lr: 1.25000e-04, loss: 4.918
Epoch [46][1460/1780], lr: 1.25000e-04, loss: 4.4197
Epoch [46][1465/1780], lr: 1.25000e-04, loss: 4.2321
Epoch [46][1470/1780], lr: 1.25000e-04, loss: 4.

Epoch [47][375/1780], lr: 1.25000e-04, loss: 3.5964
Epoch [47][380/1780], lr: 1.25000e-04, loss: 3.2128
Epoch [47][385/1780], lr: 1.25000e-04, loss: 3.5107
Epoch [47][390/1780], lr: 1.25000e-04, loss: 3.977
Epoch [47][395/1780], lr: 1.25000e-04, loss: 3.9921
Epoch [47][400/1780], lr: 1.25000e-04, loss: 3.7551
Epoch [47][405/1780], lr: 1.25000e-04, loss: 4.0003
Epoch [47][410/1780], lr: 1.25000e-04, loss: 3.247
Epoch [47][415/1780], lr: 1.25000e-04, loss: 4.1081
Epoch [47][420/1780], lr: 1.25000e-04, loss: 4.2344
Epoch [47][425/1780], lr: 1.25000e-04, loss: 3.5795
Epoch [47][430/1780], lr: 1.25000e-04, loss: 3.1604
Epoch [47][435/1780], lr: 1.25000e-04, loss: 4.2005
Epoch [47][440/1780], lr: 1.25000e-04, loss: 4.3874
Epoch [47][445/1780], lr: 1.25000e-04, loss: 2.7263
Epoch [47][450/1780], lr: 1.25000e-04, loss: 4.5009
Epoch [47][455/1780], lr: 1.25000e-04, loss: 4.2672
Epoch [47][460/1780], lr: 1.25000e-04, loss: 3.7078
Epoch [47][465/1780], lr: 1.25000e-04, loss: 4.1195
Epoch [47][470

Epoch [47][1165/1780], lr: 1.25000e-04, loss: 3.7517
Epoch [47][1170/1780], lr: 1.25000e-04, loss: 3.5387
Epoch [47][1175/1780], lr: 1.25000e-04, loss: 3.1724
Epoch [47][1180/1780], lr: 1.25000e-04, loss: 3.7959
Epoch [47][1185/1780], lr: 1.25000e-04, loss: 4.0929
Epoch [47][1190/1780], lr: 1.25000e-04, loss: 3.2278
Epoch [47][1195/1780], lr: 1.25000e-04, loss: 3.208
Epoch [47][1200/1780], lr: 1.25000e-04, loss: 2.892
Epoch [47][1205/1780], lr: 1.25000e-04, loss: 4.0927
Epoch [47][1210/1780], lr: 1.25000e-04, loss: 4.2717
Epoch [47][1215/1780], lr: 1.25000e-04, loss: 2.3326
Epoch [47][1220/1780], lr: 1.25000e-04, loss: 3.9621
Epoch [47][1225/1780], lr: 1.25000e-04, loss: 3.8969
Epoch [47][1230/1780], lr: 1.25000e-04, loss: 3.6784
Epoch [47][1235/1780], lr: 1.25000e-04, loss: 3.73
Epoch [47][1240/1780], lr: 1.25000e-04, loss: 4.2485
Epoch [47][1245/1780], lr: 1.25000e-04, loss: 3.6583
Epoch [47][1250/1780], lr: 1.25000e-04, loss: 3.6835
Epoch [47][1255/1780], lr: 1.25000e-04, loss: 3.67

Epoch [48][155/1780], lr: 1.25000e-04, loss: 2.818
Epoch [48][160/1780], lr: 1.25000e-04, loss: 4.096
Epoch [48][165/1780], lr: 1.25000e-04, loss: 4.0161
Epoch [48][170/1780], lr: 1.25000e-04, loss: 4.1758
Epoch [48][175/1780], lr: 1.25000e-04, loss: 3.5641
Epoch [48][180/1780], lr: 1.25000e-04, loss: 4.3618
Epoch [48][185/1780], lr: 1.25000e-04, loss: 3.6468
Epoch [48][190/1780], lr: 1.25000e-04, loss: 3.93
Epoch [48][195/1780], lr: 1.25000e-04, loss: 5.0699
Epoch [48][200/1780], lr: 1.25000e-04, loss: 4.2957
Epoch [48][205/1780], lr: 1.25000e-04, loss: 3.9688
Epoch [48][210/1780], lr: 1.25000e-04, loss: 5.0629
Epoch [48][215/1780], lr: 1.25000e-04, loss: 4.4235
Epoch [48][220/1780], lr: 1.25000e-04, loss: 4.5253
Epoch [48][225/1780], lr: 1.25000e-04, loss: 3.7163
Epoch [48][230/1780], lr: 1.25000e-04, loss: 3.0925
Epoch [48][235/1780], lr: 1.25000e-04, loss: 3.327
Epoch [48][240/1780], lr: 1.25000e-04, loss: 3.7808
Epoch [48][245/1780], lr: 1.25000e-04, loss: 4.3148
Epoch [48][250/17

Epoch [48][945/1780], lr: 1.25000e-04, loss: 3.8946
Epoch [48][950/1780], lr: 1.25000e-04, loss: 4.1265
Epoch [48][955/1780], lr: 1.25000e-04, loss: 3.9405
Epoch [48][960/1780], lr: 1.25000e-04, loss: 3.7428
Epoch [48][965/1780], lr: 1.25000e-04, loss: 3.4734
Epoch [48][970/1780], lr: 1.25000e-04, loss: 4.2639
Epoch [48][975/1780], lr: 1.25000e-04, loss: 3.4773
Epoch [48][980/1780], lr: 1.25000e-04, loss: 3.8412
Epoch [48][985/1780], lr: 1.25000e-04, loss: 3.4784
Epoch [48][990/1780], lr: 1.25000e-04, loss: 3.8237
Epoch [48][995/1780], lr: 1.25000e-04, loss: 4.5843
Epoch [48][1000/1780], lr: 1.25000e-04, loss: 3.4883
Epoch [48][1005/1780], lr: 1.25000e-04, loss: 4.4024
Epoch [48][1010/1780], lr: 1.25000e-04, loss: 5.0068
Epoch [48][1015/1780], lr: 1.25000e-04, loss: 3.7134
Epoch [48][1020/1780], lr: 1.25000e-04, loss: 3.6382
Epoch [48][1025/1780], lr: 1.25000e-04, loss: 3.8962
Epoch [48][1030/1780], lr: 1.25000e-04, loss: 2.7611
Epoch [48][1035/1780], lr: 1.25000e-04, loss: 3.2563
Epoc

Epoch [48][1725/1780], lr: 1.25000e-04, loss: 4.6229
Epoch [48][1730/1780], lr: 1.25000e-04, loss: 3.7256
Epoch [48][1735/1780], lr: 1.25000e-04, loss: 4.2378
Epoch [48][1740/1780], lr: 1.25000e-04, loss: 4.5264
Epoch [48][1745/1780], lr: 1.25000e-04, loss: 4.3032
Epoch [48][1750/1780], lr: 1.25000e-04, loss: 4.9266
Epoch [48][1755/1780], lr: 1.25000e-04, loss: 4.1791
Epoch [48][1760/1780], lr: 1.25000e-04, loss: 4.0305
Epoch [48][1765/1780], lr: 1.25000e-04, loss: 4.1383
Epoch [48][1770/1780], lr: 1.25000e-04, loss: 3.2169
Epoch [48][1775/1780], lr: 1.25000e-04, loss: 3.7072
Epoch [48][1780/1780], lr: 1.25000e-04, loss: 4.4468
Evaluating top_k_accuracy...
top1_acc: 0.06202, top5_acc: 0.1395, train_loss: 4.4468, val_loss: 4.4424
Saving checkpoint at 48 epochs...
Epoch [49][5/1780], lr: 1.25000e-04, loss: 3.5326
Epoch [49][10/1780], lr: 1.25000e-04, loss: 3.5779
Epoch [49][15/1780], lr: 1.25000e-04, loss: 3.9135
Epoch [49][20/1780], lr: 1.25000e-04, loss: 3.6082
Epoch [49][25/1780], lr:

Epoch [49][725/1780], lr: 1.25000e-04, loss: 3.3236
Epoch [49][730/1780], lr: 1.25000e-04, loss: 3.9444
Epoch [49][735/1780], lr: 1.25000e-04, loss: 4.0179
Epoch [49][740/1780], lr: 1.25000e-04, loss: 4.19
Epoch [49][745/1780], lr: 1.25000e-04, loss: 3.9678
Epoch [49][750/1780], lr: 1.25000e-04, loss: 4.2769
Epoch [49][755/1780], lr: 1.25000e-04, loss: 5.1022
Epoch [49][760/1780], lr: 1.25000e-04, loss: 3.5697
Epoch [49][765/1780], lr: 1.25000e-04, loss: 4.1483
Epoch [49][770/1780], lr: 1.25000e-04, loss: 3.2957
Epoch [49][775/1780], lr: 1.25000e-04, loss: 4.3491
Epoch [49][780/1780], lr: 1.25000e-04, loss: 4.184
Epoch [49][785/1780], lr: 1.25000e-04, loss: 3.6347
Epoch [49][790/1780], lr: 1.25000e-04, loss: 3.8018
Epoch [49][795/1780], lr: 1.25000e-04, loss: 4.0845
Epoch [49][800/1780], lr: 1.25000e-04, loss: 2.8302
Epoch [49][805/1780], lr: 1.25000e-04, loss: 3.4067
Epoch [49][810/1780], lr: 1.25000e-04, loss: 3.7977
Epoch [49][815/1780], lr: 1.25000e-04, loss: 5.1962
Epoch [49][820/

Epoch [49][1505/1780], lr: 1.25000e-04, loss: 4.9495
Epoch [49][1510/1780], lr: 1.25000e-04, loss: 3.3859
Epoch [49][1515/1780], lr: 1.25000e-04, loss: 2.9566
Epoch [49][1520/1780], lr: 1.25000e-04, loss: 3.1271
Epoch [49][1525/1780], lr: 1.25000e-04, loss: 3.6234
Epoch [49][1530/1780], lr: 1.25000e-04, loss: 3.9821
Epoch [49][1535/1780], lr: 1.25000e-04, loss: 5.6993
Epoch [49][1540/1780], lr: 1.25000e-04, loss: 3.4133
Epoch [49][1545/1780], lr: 1.25000e-04, loss: 4.2826
Epoch [49][1550/1780], lr: 1.25000e-04, loss: 4.1814
Epoch [49][1555/1780], lr: 1.25000e-04, loss: 3.369
Epoch [49][1560/1780], lr: 1.25000e-04, loss: 3.1228
Epoch [49][1565/1780], lr: 1.25000e-04, loss: 3.5739
Epoch [49][1570/1780], lr: 1.25000e-04, loss: 3.7049
Epoch [49][1575/1780], lr: 1.25000e-04, loss: 3.6659
Epoch [49][1580/1780], lr: 1.25000e-04, loss: 3.0633
Epoch [49][1585/1780], lr: 1.25000e-04, loss: 2.9161
Epoch [49][1590/1780], lr: 1.25000e-04, loss: 4.5498
Epoch [49][1595/1780], lr: 1.25000e-04, loss: 4

Epoch [50][500/1780], lr: 1.25000e-04, loss: 4.2658
Epoch [50][505/1780], lr: 1.25000e-04, loss: 4.5822
Epoch [50][510/1780], lr: 1.25000e-04, loss: 3.3743
Epoch [50][515/1780], lr: 1.25000e-04, loss: 4.1244
Epoch [50][520/1780], lr: 1.25000e-04, loss: 4.2132
Epoch [50][525/1780], lr: 1.25000e-04, loss: 4.1451
Epoch [50][530/1780], lr: 1.25000e-04, loss: 2.7203
Epoch [50][535/1780], lr: 1.25000e-04, loss: 2.8661
Epoch [50][540/1780], lr: 1.25000e-04, loss: 2.753
Epoch [50][545/1780], lr: 1.25000e-04, loss: 3.7524
Epoch [50][550/1780], lr: 1.25000e-04, loss: 4.4116
Epoch [50][555/1780], lr: 1.25000e-04, loss: 3.3012
Epoch [50][560/1780], lr: 1.25000e-04, loss: 3.3173
Epoch [50][565/1780], lr: 1.25000e-04, loss: 4.7231
Epoch [50][570/1780], lr: 1.25000e-04, loss: 4.1351
Epoch [50][575/1780], lr: 1.25000e-04, loss: 3.1354
Epoch [50][580/1780], lr: 1.25000e-04, loss: 3.3303
Epoch [50][585/1780], lr: 1.25000e-04, loss: 3.9069
Epoch [50][590/1780], lr: 1.25000e-04, loss: 5.0092
Epoch [50][59

Epoch [50][1285/1780], lr: 1.25000e-04, loss: 3.3174
Epoch [50][1290/1780], lr: 1.25000e-04, loss: 3.5267
Epoch [50][1295/1780], lr: 1.25000e-04, loss: 4.7228
Epoch [50][1300/1780], lr: 1.25000e-04, loss: 2.8196
Epoch [50][1305/1780], lr: 1.25000e-04, loss: 4.4269
Epoch [50][1310/1780], lr: 1.25000e-04, loss: 3.3261
Epoch [50][1315/1780], lr: 1.25000e-04, loss: 2.4939
Epoch [50][1320/1780], lr: 1.25000e-04, loss: 3.0399
Epoch [50][1325/1780], lr: 1.25000e-04, loss: 3.8383
Epoch [50][1330/1780], lr: 1.25000e-04, loss: 4.6403
Epoch [50][1335/1780], lr: 1.25000e-04, loss: 3.8977
Epoch [50][1340/1780], lr: 1.25000e-04, loss: 3.8552
Epoch [50][1345/1780], lr: 1.25000e-04, loss: 4.6264
Epoch [50][1350/1780], lr: 1.25000e-04, loss: 3.7548
Epoch [50][1355/1780], lr: 1.25000e-04, loss: 3.3815
Epoch [50][1360/1780], lr: 1.25000e-04, loss: 3.0822
Epoch [50][1365/1780], lr: 1.25000e-04, loss: 3.5191
Epoch [50][1370/1780], lr: 1.25000e-04, loss: 4.4244
Epoch [50][1375/1780], lr: 1.25000e-04, loss: 

Epoch [51][275/1780], lr: 1.25000e-04, loss: 2.5837
Epoch [51][280/1780], lr: 1.25000e-04, loss: 3.7011
Epoch [51][285/1780], lr: 1.25000e-04, loss: 3.2223
Epoch [51][290/1780], lr: 1.25000e-04, loss: 3.7929
Epoch [51][295/1780], lr: 1.25000e-04, loss: 4.1885
Epoch [51][300/1780], lr: 1.25000e-04, loss: 4.2588
Epoch [51][305/1780], lr: 1.25000e-04, loss: 4.1345
Epoch [51][310/1780], lr: 1.25000e-04, loss: 4.142
Epoch [51][315/1780], lr: 1.25000e-04, loss: 3.0828
Epoch [51][320/1780], lr: 1.25000e-04, loss: 3.8092
Epoch [51][325/1780], lr: 1.25000e-04, loss: 3.3568
Epoch [51][330/1780], lr: 1.25000e-04, loss: 4.0273
Epoch [51][335/1780], lr: 1.25000e-04, loss: 5.1987
Epoch [51][340/1780], lr: 1.25000e-04, loss: 4.4026
Epoch [51][345/1780], lr: 1.25000e-04, loss: 3.2579
Epoch [51][350/1780], lr: 1.25000e-04, loss: 3.542
Epoch [51][355/1780], lr: 1.25000e-04, loss: 4.5406
Epoch [51][360/1780], lr: 1.25000e-04, loss: 2.5695
Epoch [51][365/1780], lr: 1.25000e-04, loss: 3.4224
Epoch [51][370

Epoch [51][1065/1780], lr: 1.25000e-04, loss: 4.134
Epoch [51][1070/1780], lr: 1.25000e-04, loss: 4.6191
Epoch [51][1075/1780], lr: 1.25000e-04, loss: 3.9756
Epoch [51][1080/1780], lr: 1.25000e-04, loss: 4.5428
Epoch [51][1085/1780], lr: 1.25000e-04, loss: 3.146
Epoch [51][1090/1780], lr: 1.25000e-04, loss: 2.6826
Epoch [51][1095/1780], lr: 1.25000e-04, loss: 2.8137
Epoch [51][1100/1780], lr: 1.25000e-04, loss: 3.4515
Epoch [51][1105/1780], lr: 1.25000e-04, loss: 3.6815
Epoch [51][1110/1780], lr: 1.25000e-04, loss: 3.1679
Epoch [51][1115/1780], lr: 1.25000e-04, loss: 3.2847
Epoch [51][1120/1780], lr: 1.25000e-04, loss: 3.8161
Epoch [51][1125/1780], lr: 1.25000e-04, loss: 4.0113
Epoch [51][1130/1780], lr: 1.25000e-04, loss: 3.4599
Epoch [51][1135/1780], lr: 1.25000e-04, loss: 4.4352
Epoch [51][1140/1780], lr: 1.25000e-04, loss: 3.7318
Epoch [51][1145/1780], lr: 1.25000e-04, loss: 3.0959
Epoch [51][1150/1780], lr: 1.25000e-04, loss: 3.8471
Epoch [51][1155/1780], lr: 1.25000e-04, loss: 3.

Epoch [52][50/1780], lr: 1.25000e-05, loss: 3.3371
Epoch [52][55/1780], lr: 1.25000e-05, loss: 4.4942
Epoch [52][60/1780], lr: 1.25000e-05, loss: 2.4027
Epoch [52][65/1780], lr: 1.25000e-05, loss: 3.6493
Epoch [52][70/1780], lr: 1.25000e-05, loss: 3.3184
Epoch [52][75/1780], lr: 1.25000e-05, loss: 3.1565
Epoch [52][80/1780], lr: 1.25000e-05, loss: 4.915
Epoch [52][85/1780], lr: 1.25000e-05, loss: 4.3896
Epoch [52][90/1780], lr: 1.25000e-05, loss: 3.9733
Epoch [52][95/1780], lr: 1.25000e-05, loss: 4.5505
Epoch [52][100/1780], lr: 1.25000e-05, loss: 3.9081
Epoch [52][105/1780], lr: 1.25000e-05, loss: 3.7984
Epoch [52][110/1780], lr: 1.25000e-05, loss: 3.6423
Epoch [52][115/1780], lr: 1.25000e-05, loss: 3.9488
Epoch [52][120/1780], lr: 1.25000e-05, loss: 5.0526
Epoch [52][125/1780], lr: 1.25000e-05, loss: 4.0551
Epoch [52][130/1780], lr: 1.25000e-05, loss: 3.6186
Epoch [52][135/1780], lr: 1.25000e-05, loss: 3.6323
Epoch [52][140/1780], lr: 1.25000e-05, loss: 3.9805
Epoch [52][145/1780], l

Epoch [52][845/1780], lr: 1.25000e-05, loss: 3.501
Epoch [52][850/1780], lr: 1.25000e-05, loss: 2.9527
Epoch [52][855/1780], lr: 1.25000e-05, loss: 2.7234
Epoch [52][860/1780], lr: 1.25000e-05, loss: 4.0076
Epoch [52][865/1780], lr: 1.25000e-05, loss: 2.4837
Epoch [52][870/1780], lr: 1.25000e-05, loss: 3.2245
Epoch [52][875/1780], lr: 1.25000e-05, loss: 3.518
Epoch [52][880/1780], lr: 1.25000e-05, loss: 2.7922
Epoch [52][885/1780], lr: 1.25000e-05, loss: 4.0809
Epoch [52][890/1780], lr: 1.25000e-05, loss: 2.6296
Epoch [52][895/1780], lr: 1.25000e-05, loss: 4.591
Epoch [52][900/1780], lr: 1.25000e-05, loss: 2.5021
Epoch [52][905/1780], lr: 1.25000e-05, loss: 3.5985
Epoch [52][910/1780], lr: 1.25000e-05, loss: 4.1811
Epoch [52][915/1780], lr: 1.25000e-05, loss: 2.9894
Epoch [52][920/1780], lr: 1.25000e-05, loss: 3.5701
Epoch [52][925/1780], lr: 1.25000e-05, loss: 3.6233
Epoch [52][930/1780], lr: 1.25000e-05, loss: 3.3911
Epoch [52][935/1780], lr: 1.25000e-05, loss: 3.7421
Epoch [52][940/

Epoch [52][1625/1780], lr: 1.25000e-05, loss: 3.1678
Epoch [52][1630/1780], lr: 1.25000e-05, loss: 3.5464
Epoch [52][1635/1780], lr: 1.25000e-05, loss: 4.1499
Epoch [52][1640/1780], lr: 1.25000e-05, loss: 2.0774
Epoch [52][1645/1780], lr: 1.25000e-05, loss: 2.5786
Epoch [52][1650/1780], lr: 1.25000e-05, loss: 3.7044
Epoch [52][1655/1780], lr: 1.25000e-05, loss: 2.8259
Epoch [52][1660/1780], lr: 1.25000e-05, loss: 3.8489
Epoch [52][1665/1780], lr: 1.25000e-05, loss: 2.1756
Epoch [52][1670/1780], lr: 1.25000e-05, loss: 3.0793
Epoch [52][1675/1780], lr: 1.25000e-05, loss: 3.6407
Epoch [52][1680/1780], lr: 1.25000e-05, loss: 3.1193
Epoch [52][1685/1780], lr: 1.25000e-05, loss: 2.891
Epoch [52][1690/1780], lr: 1.25000e-05, loss: 3.551
Epoch [52][1695/1780], lr: 1.25000e-05, loss: 3.5984
Epoch [52][1700/1780], lr: 1.25000e-05, loss: 3.1226
Epoch [52][1705/1780], lr: 1.25000e-05, loss: 3.5832
Epoch [52][1710/1780], lr: 1.25000e-05, loss: 4.8393
Epoch [52][1715/1780], lr: 1.25000e-05, loss: 3.

Epoch [53][620/1780], lr: 1.25000e-05, loss: 3.1633
Epoch [53][625/1780], lr: 1.25000e-05, loss: 3.784
Epoch [53][630/1780], lr: 1.25000e-05, loss: 3.386
Epoch [53][635/1780], lr: 1.25000e-05, loss: 3.0167
Epoch [53][640/1780], lr: 1.25000e-05, loss: 4.5588
Epoch [53][645/1780], lr: 1.25000e-05, loss: 2.7001
Epoch [53][650/1780], lr: 1.25000e-05, loss: 2.6701
Epoch [53][655/1780], lr: 1.25000e-05, loss: 4.0186
Epoch [53][660/1780], lr: 1.25000e-05, loss: 3.3447
Epoch [53][665/1780], lr: 1.25000e-05, loss: 1.9206
Epoch [53][670/1780], lr: 1.25000e-05, loss: 3.7476
Epoch [53][675/1780], lr: 1.25000e-05, loss: 3.2937
Epoch [53][680/1780], lr: 1.25000e-05, loss: 3.8262
Epoch [53][685/1780], lr: 1.25000e-05, loss: 3.4517
Epoch [53][690/1780], lr: 1.25000e-05, loss: 2.5437
Epoch [53][695/1780], lr: 1.25000e-05, loss: 2.1313
Epoch [53][700/1780], lr: 1.25000e-05, loss: 3.364
Epoch [53][705/1780], lr: 1.25000e-05, loss: 3.8907
Epoch [53][710/1780], lr: 1.25000e-05, loss: 3.8207
Epoch [53][715/

Epoch [53][1405/1780], lr: 1.25000e-05, loss: 3.2979
Epoch [53][1410/1780], lr: 1.25000e-05, loss: 2.7267
Epoch [53][1415/1780], lr: 1.25000e-05, loss: 3.191
Epoch [53][1420/1780], lr: 1.25000e-05, loss: 3.9197
Epoch [53][1425/1780], lr: 1.25000e-05, loss: 3.9012
Epoch [53][1430/1780], lr: 1.25000e-05, loss: 3.2035
Epoch [53][1435/1780], lr: 1.25000e-05, loss: 3.2036
Epoch [53][1440/1780], lr: 1.25000e-05, loss: 3.4548
Epoch [53][1445/1780], lr: 1.25000e-05, loss: 3.1839
Epoch [53][1450/1780], lr: 1.25000e-05, loss: 3.4375
Epoch [53][1455/1780], lr: 1.25000e-05, loss: 2.7878
Epoch [53][1460/1780], lr: 1.25000e-05, loss: 2.9762
Epoch [53][1465/1780], lr: 1.25000e-05, loss: 3.3658
Epoch [53][1470/1780], lr: 1.25000e-05, loss: 3.3126
Epoch [53][1475/1780], lr: 1.25000e-05, loss: 4.3425
Epoch [53][1480/1780], lr: 1.25000e-05, loss: 1.9928
Epoch [53][1485/1780], lr: 1.25000e-05, loss: 3.5243
Epoch [53][1490/1780], lr: 1.25000e-05, loss: 3.1758
Epoch [53][1495/1780], lr: 1.25000e-05, loss: 3

Epoch [54][400/1780], lr: 1.25000e-05, loss: 3.6066
Epoch [54][405/1780], lr: 1.25000e-05, loss: 3.514
Epoch [54][410/1780], lr: 1.25000e-05, loss: 2.9525
Epoch [54][415/1780], lr: 1.25000e-05, loss: 3.2213
Epoch [54][420/1780], lr: 1.25000e-05, loss: 3.3972
Epoch [54][425/1780], lr: 1.25000e-05, loss: 2.9156
Epoch [54][430/1780], lr: 1.25000e-05, loss: 3.1705
Epoch [54][435/1780], lr: 1.25000e-05, loss: 3.446
Epoch [54][440/1780], lr: 1.25000e-05, loss: 4.27
Epoch [54][445/1780], lr: 1.25000e-05, loss: 3.279
Epoch [54][450/1780], lr: 1.25000e-05, loss: 3.0452
Epoch [54][455/1780], lr: 1.25000e-05, loss: 1.783
Epoch [54][460/1780], lr: 1.25000e-05, loss: 2.8905
Epoch [54][465/1780], lr: 1.25000e-05, loss: 3.2926
Epoch [54][470/1780], lr: 1.25000e-05, loss: 3.5131
Epoch [54][475/1780], lr: 1.25000e-05, loss: 4.6831
Epoch [54][480/1780], lr: 1.25000e-05, loss: 4.6599
Epoch [54][485/1780], lr: 1.25000e-05, loss: 3.4138
Epoch [54][490/1780], lr: 1.25000e-05, loss: 2.9632
Epoch [54][495/178

Epoch [54][1190/1780], lr: 1.25000e-05, loss: 3.4593
Epoch [54][1195/1780], lr: 1.25000e-05, loss: 3.1863
Epoch [54][1200/1780], lr: 1.25000e-05, loss: 3.3417
Epoch [54][1205/1780], lr: 1.25000e-05, loss: 3.3314
Epoch [54][1210/1780], lr: 1.25000e-05, loss: 3.2643
Epoch [54][1215/1780], lr: 1.25000e-05, loss: 3.7914
Epoch [54][1220/1780], lr: 1.25000e-05, loss: 4.3516
Epoch [54][1225/1780], lr: 1.25000e-05, loss: 2.4858
Epoch [54][1230/1780], lr: 1.25000e-05, loss: 4.299
Epoch [54][1235/1780], lr: 1.25000e-05, loss: 3.6732
Epoch [54][1240/1780], lr: 1.25000e-05, loss: 3.2757
Epoch [54][1245/1780], lr: 1.25000e-05, loss: 3.2946
Epoch [54][1250/1780], lr: 1.25000e-05, loss: 4.0069
Epoch [54][1255/1780], lr: 1.25000e-05, loss: 3.4841
Epoch [54][1260/1780], lr: 1.25000e-05, loss: 3.7132
Epoch [54][1265/1780], lr: 1.25000e-05, loss: 4.4118
Epoch [54][1270/1780], lr: 1.25000e-05, loss: 2.9927
Epoch [54][1275/1780], lr: 1.25000e-05, loss: 3.1976
Epoch [54][1280/1780], lr: 1.25000e-05, loss: 3

Epoch [55][180/1780], lr: 1.25000e-05, loss: 2.7497
Epoch [55][185/1780], lr: 1.25000e-05, loss: 3.4997
Epoch [55][190/1780], lr: 1.25000e-05, loss: 2.8461
Epoch [55][195/1780], lr: 1.25000e-05, loss: 4.231
Epoch [55][200/1780], lr: 1.25000e-05, loss: 2.8601
Epoch [55][205/1780], lr: 1.25000e-05, loss: 2.9012
Epoch [55][210/1780], lr: 1.25000e-05, loss: 3.2592
Epoch [55][215/1780], lr: 1.25000e-05, loss: 2.8373
Epoch [55][220/1780], lr: 1.25000e-05, loss: 3.7841
Epoch [55][225/1780], lr: 1.25000e-05, loss: 3.4809
Epoch [55][230/1780], lr: 1.25000e-05, loss: 3.9863
Epoch [55][235/1780], lr: 1.25000e-05, loss: 2.3017
Epoch [55][240/1780], lr: 1.25000e-05, loss: 3.063
Epoch [55][245/1780], lr: 1.25000e-05, loss: 2.7969
Epoch [55][250/1780], lr: 1.25000e-05, loss: 3.4066
Epoch [55][255/1780], lr: 1.25000e-05, loss: 2.3721
Epoch [55][260/1780], lr: 1.25000e-05, loss: 3.9629
Epoch [55][265/1780], lr: 1.25000e-05, loss: 3.509
Epoch [55][270/1780], lr: 1.25000e-05, loss: 3.4052
Epoch [55][275/

Epoch [55][970/1780], lr: 1.25000e-05, loss: 2.9179
Epoch [55][975/1780], lr: 1.25000e-05, loss: 2.7526
Epoch [55][980/1780], lr: 1.25000e-05, loss: 3.6345
Epoch [55][985/1780], lr: 1.25000e-05, loss: 3.0369
Epoch [55][990/1780], lr: 1.25000e-05, loss: 3.3045
Epoch [55][995/1780], lr: 1.25000e-05, loss: 3.2865
Epoch [55][1000/1780], lr: 1.25000e-05, loss: 3.3197
Epoch [55][1005/1780], lr: 1.25000e-05, loss: 3.3976
Epoch [55][1010/1780], lr: 1.25000e-05, loss: 3.5285
Epoch [55][1015/1780], lr: 1.25000e-05, loss: 2.9736
Epoch [55][1020/1780], lr: 1.25000e-05, loss: 4.2202
Epoch [55][1025/1780], lr: 1.25000e-05, loss: 4.4943
Epoch [55][1030/1780], lr: 1.25000e-05, loss: 3.2118
Epoch [55][1035/1780], lr: 1.25000e-05, loss: 3.4739
Epoch [55][1040/1780], lr: 1.25000e-05, loss: 2.4844
Epoch [55][1045/1780], lr: 1.25000e-05, loss: 3.576
Epoch [55][1050/1780], lr: 1.25000e-05, loss: 3.9946
Epoch [55][1055/1780], lr: 1.25000e-05, loss: 3.9101
Epoch [55][1060/1780], lr: 1.25000e-05, loss: 3.4004


Epoch [55][1750/1780], lr: 1.25000e-05, loss: 3.3948
Epoch [55][1755/1780], lr: 1.25000e-05, loss: 2.718
Epoch [55][1760/1780], lr: 1.25000e-05, loss: 3.6956
Epoch [55][1765/1780], lr: 1.25000e-05, loss: 3.0923
Epoch [55][1770/1780], lr: 1.25000e-05, loss: 3.464
Epoch [55][1775/1780], lr: 1.25000e-05, loss: 3.2413
Epoch [55][1780/1780], lr: 1.25000e-05, loss: 3.2553
Evaluating top_k_accuracy...
top1_acc: 0.124, top5_acc: 0.2752, train_loss: 3.2553, val_loss: 3.9166
Saving checkpoint at 55 epochs...
Epoch [56][5/1780], lr: 1.25000e-05, loss: 3.2455
Epoch [56][10/1780], lr: 1.25000e-05, loss: 2.9582
Epoch [56][15/1780], lr: 1.25000e-05, loss: 4.1975
Epoch [56][20/1780], lr: 1.25000e-05, loss: 2.6072
Epoch [56][25/1780], lr: 1.25000e-05, loss: 4.0898
Epoch [56][30/1780], lr: 1.25000e-05, loss: 2.5995
Epoch [56][35/1780], lr: 1.25000e-05, loss: 4.0232
Epoch [56][40/1780], lr: 1.25000e-05, loss: 3.0586
Epoch [56][45/1780], lr: 1.25000e-05, loss: 3.5619
Epoch [56][50/1780], lr: 1.25000e-05, 

Epoch [56][750/1780], lr: 1.25000e-05, loss: 2.8481
Epoch [56][755/1780], lr: 1.25000e-05, loss: 3.1196
Epoch [56][760/1780], lr: 1.25000e-05, loss: 3.7902
Epoch [56][765/1780], lr: 1.25000e-05, loss: 3.7321
Epoch [56][770/1780], lr: 1.25000e-05, loss: 2.7602
Epoch [56][775/1780], lr: 1.25000e-05, loss: 3.2723
Epoch [56][780/1780], lr: 1.25000e-05, loss: 3.5281
Epoch [56][785/1780], lr: 1.25000e-05, loss: 2.4913
Epoch [56][790/1780], lr: 1.25000e-05, loss: 3.2684
Epoch [56][795/1780], lr: 1.25000e-05, loss: 2.5531
Epoch [56][800/1780], lr: 1.25000e-05, loss: 3.1418
Epoch [56][805/1780], lr: 1.25000e-05, loss: 3.2278
Epoch [56][810/1780], lr: 1.25000e-05, loss: 2.8944
Epoch [56][815/1780], lr: 1.25000e-05, loss: 2.6887
Epoch [56][820/1780], lr: 1.25000e-05, loss: 2.4134
Epoch [56][825/1780], lr: 1.25000e-05, loss: 3.2472
Epoch [56][830/1780], lr: 1.25000e-05, loss: 3.1112
Epoch [56][835/1780], lr: 1.25000e-05, loss: 3.2965
Epoch [56][840/1780], lr: 1.25000e-05, loss: 4.6258
Epoch [56][8

Epoch [56][1530/1780], lr: 1.25000e-05, loss: 2.4825
Epoch [56][1535/1780], lr: 1.25000e-05, loss: 2.621
Epoch [56][1540/1780], lr: 1.25000e-05, loss: 3.6434
Epoch [56][1545/1780], lr: 1.25000e-05, loss: 2.9987
Epoch [56][1550/1780], lr: 1.25000e-05, loss: 3.228
Epoch [56][1555/1780], lr: 1.25000e-05, loss: 3.0603
Epoch [56][1560/1780], lr: 1.25000e-05, loss: 2.8135
Epoch [56][1565/1780], lr: 1.25000e-05, loss: 2.9063
Epoch [56][1570/1780], lr: 1.25000e-05, loss: 3.5834
Epoch [56][1575/1780], lr: 1.25000e-05, loss: 3.0636
Epoch [56][1580/1780], lr: 1.25000e-05, loss: 4.3625
Epoch [56][1585/1780], lr: 1.25000e-05, loss: 3.0415
Epoch [56][1590/1780], lr: 1.25000e-05, loss: 3.589
Epoch [56][1595/1780], lr: 1.25000e-05, loss: 3.2607
Epoch [56][1600/1780], lr: 1.25000e-05, loss: 3.2591
Epoch [56][1605/1780], lr: 1.25000e-05, loss: 3.7113
Epoch [56][1610/1780], lr: 1.25000e-05, loss: 2.9266
Epoch [56][1615/1780], lr: 1.25000e-05, loss: 2.8004
Epoch [56][1620/1780], lr: 1.25000e-05, loss: 3.5

Epoch [57][525/1780], lr: 1.25000e-05, loss: 2.0892
Epoch [57][530/1780], lr: 1.25000e-05, loss: 4.5835
Epoch [57][535/1780], lr: 1.25000e-05, loss: 2.3657
Epoch [57][540/1780], lr: 1.25000e-05, loss: 3.5565
Epoch [57][545/1780], lr: 1.25000e-05, loss: 3.5606
Epoch [57][550/1780], lr: 1.25000e-05, loss: 3.3104
Epoch [57][555/1780], lr: 1.25000e-05, loss: 3.0841
Epoch [57][560/1780], lr: 1.25000e-05, loss: 2.713
Epoch [57][565/1780], lr: 1.25000e-05, loss: 4.1682
Epoch [57][570/1780], lr: 1.25000e-05, loss: 3.4772
Epoch [57][575/1780], lr: 1.25000e-05, loss: 3.1055
Epoch [57][580/1780], lr: 1.25000e-05, loss: 3.7969
Epoch [57][585/1780], lr: 1.25000e-05, loss: 3.5701
Epoch [57][590/1780], lr: 1.25000e-05, loss: 2.7132
Epoch [57][595/1780], lr: 1.25000e-05, loss: 3.5058
Epoch [57][600/1780], lr: 1.25000e-05, loss: 3.5795
Epoch [57][605/1780], lr: 1.25000e-05, loss: 3.2608
Epoch [57][610/1780], lr: 1.25000e-05, loss: 3.7266
Epoch [57][615/1780], lr: 1.25000e-05, loss: 3.9132
Epoch [57][62

Epoch [57][1310/1780], lr: 1.25000e-05, loss: 2.541
Epoch [57][1315/1780], lr: 1.25000e-05, loss: 2.9161
Epoch [57][1320/1780], lr: 1.25000e-05, loss: 3.8814
Epoch [57][1325/1780], lr: 1.25000e-05, loss: 3.4325
Epoch [57][1330/1780], lr: 1.25000e-05, loss: 3.8217
Epoch [57][1335/1780], lr: 1.25000e-05, loss: 2.0818
Epoch [57][1340/1780], lr: 1.25000e-05, loss: 2.9731
Epoch [57][1345/1780], lr: 1.25000e-05, loss: 3.7947
Epoch [57][1350/1780], lr: 1.25000e-05, loss: 2.3696
Epoch [57][1355/1780], lr: 1.25000e-05, loss: 2.5614
Epoch [57][1360/1780], lr: 1.25000e-05, loss: 3.452
Epoch [57][1365/1780], lr: 1.25000e-05, loss: 2.1034
Epoch [57][1370/1780], lr: 1.25000e-05, loss: 3.4294
Epoch [57][1375/1780], lr: 1.25000e-05, loss: 3.4508
Epoch [57][1380/1780], lr: 1.25000e-05, loss: 3.8256
Epoch [57][1385/1780], lr: 1.25000e-05, loss: 3.5707
Epoch [57][1390/1780], lr: 1.25000e-05, loss: 3.2579
Epoch [57][1395/1780], lr: 1.25000e-05, loss: 3.4395
Epoch [57][1400/1780], lr: 1.25000e-05, loss: 4.

Epoch [58][300/1780], lr: 1.25000e-05, loss: 3.2563
Epoch [58][305/1780], lr: 1.25000e-05, loss: 2.5017
Epoch [58][310/1780], lr: 1.25000e-05, loss: 3.7604
Epoch [58][315/1780], lr: 1.25000e-05, loss: 3.005
Epoch [58][320/1780], lr: 1.25000e-05, loss: 4.0217
Epoch [58][325/1780], lr: 1.25000e-05, loss: 2.6798
Epoch [58][330/1780], lr: 1.25000e-05, loss: 3.259
Epoch [58][335/1780], lr: 1.25000e-05, loss: 3.4764
Epoch [58][340/1780], lr: 1.25000e-05, loss: 3.0821
Epoch [58][345/1780], lr: 1.25000e-05, loss: 3.3523
Epoch [58][350/1780], lr: 1.25000e-05, loss: 3.5608
Epoch [58][355/1780], lr: 1.25000e-05, loss: 3.5889
Epoch [58][360/1780], lr: 1.25000e-05, loss: 3.2235
Epoch [58][365/1780], lr: 1.25000e-05, loss: 3.0241
Epoch [58][370/1780], lr: 1.25000e-05, loss: 3.5466
Epoch [58][375/1780], lr: 1.25000e-05, loss: 3.3589
Epoch [58][380/1780], lr: 1.25000e-05, loss: 3.2353
Epoch [58][385/1780], lr: 1.25000e-05, loss: 2.5938
Epoch [58][390/1780], lr: 1.25000e-05, loss: 3.3181
Epoch [58][395

Epoch [58][1090/1780], lr: 1.25000e-05, loss: 3.5205
Epoch [58][1095/1780], lr: 1.25000e-05, loss: 3.4683
Epoch [58][1100/1780], lr: 1.25000e-05, loss: 4.2699
Epoch [58][1105/1780], lr: 1.25000e-05, loss: 2.7774
Epoch [58][1110/1780], lr: 1.25000e-05, loss: 3.021
Epoch [58][1115/1780], lr: 1.25000e-05, loss: 3.2741
Epoch [58][1120/1780], lr: 1.25000e-05, loss: 3.8385
Epoch [58][1125/1780], lr: 1.25000e-05, loss: 3.8296
Epoch [58][1130/1780], lr: 1.25000e-05, loss: 2.9414
Epoch [58][1135/1780], lr: 1.25000e-05, loss: 2.4791
Epoch [58][1140/1780], lr: 1.25000e-05, loss: 3.2329
Epoch [58][1145/1780], lr: 1.25000e-05, loss: 3.8118
Epoch [58][1150/1780], lr: 1.25000e-05, loss: 3.1931
Epoch [58][1155/1780], lr: 1.25000e-05, loss: 4.4628
Epoch [58][1160/1780], lr: 1.25000e-05, loss: 3.5034
Epoch [58][1165/1780], lr: 1.25000e-05, loss: 2.1896
Epoch [58][1170/1780], lr: 1.25000e-05, loss: 3.3813
Epoch [58][1175/1780], lr: 1.25000e-05, loss: 3.9412
Epoch [58][1180/1780], lr: 1.25000e-05, loss: 3

Epoch [59][75/1780], lr: 1.25000e-05, loss: 3.1893
Epoch [59][80/1780], lr: 1.25000e-05, loss: 2.9198
Epoch [59][85/1780], lr: 1.25000e-05, loss: 3.3325
Epoch [59][90/1780], lr: 1.25000e-05, loss: 2.7283
Epoch [59][95/1780], lr: 1.25000e-05, loss: 2.1679
Epoch [59][100/1780], lr: 1.25000e-05, loss: 2.7009
Epoch [59][105/1780], lr: 1.25000e-05, loss: 3.1511
Epoch [59][110/1780], lr: 1.25000e-05, loss: 2.8976
Epoch [59][115/1780], lr: 1.25000e-05, loss: 3.5441
Epoch [59][120/1780], lr: 1.25000e-05, loss: 4.5871
Epoch [59][125/1780], lr: 1.25000e-05, loss: 3.5894
Epoch [59][130/1780], lr: 1.25000e-05, loss: 3.2663
Epoch [59][135/1780], lr: 1.25000e-05, loss: 2.782
Epoch [59][140/1780], lr: 1.25000e-05, loss: 3.0979
Epoch [59][145/1780], lr: 1.25000e-05, loss: 2.3552
Epoch [59][150/1780], lr: 1.25000e-05, loss: 3.1699
Epoch [59][155/1780], lr: 1.25000e-05, loss: 3.6157
Epoch [59][160/1780], lr: 1.25000e-05, loss: 3.1449
Epoch [59][165/1780], lr: 1.25000e-05, loss: 3.2643
Epoch [59][170/178

Epoch [59][865/1780], lr: 1.25000e-05, loss: 4.5597
Epoch [59][870/1780], lr: 1.25000e-05, loss: 2.9097
Epoch [59][875/1780], lr: 1.25000e-05, loss: 3.5215
Epoch [59][880/1780], lr: 1.25000e-05, loss: 3.5531
Epoch [59][885/1780], lr: 1.25000e-05, loss: 3.5286
Epoch [59][890/1780], lr: 1.25000e-05, loss: 3.4404
Epoch [59][895/1780], lr: 1.25000e-05, loss: 3.1351
Epoch [59][900/1780], lr: 1.25000e-05, loss: 2.8585
Epoch [59][905/1780], lr: 1.25000e-05, loss: 3.3056
Epoch [59][910/1780], lr: 1.25000e-05, loss: 2.9415
Epoch [59][915/1780], lr: 1.25000e-05, loss: 3.6262
Epoch [59][920/1780], lr: 1.25000e-05, loss: 3.556
Epoch [59][925/1780], lr: 1.25000e-05, loss: 2.5116
Epoch [59][930/1780], lr: 1.25000e-05, loss: 3.9161
Epoch [59][935/1780], lr: 1.25000e-05, loss: 3.6269
Epoch [59][940/1780], lr: 1.25000e-05, loss: 3.9213
Epoch [59][945/1780], lr: 1.25000e-05, loss: 4.0665
Epoch [59][950/1780], lr: 1.25000e-05, loss: 3.0177
Epoch [59][955/1780], lr: 1.25000e-05, loss: 3.264
Epoch [59][960

Epoch [59][1645/1780], lr: 1.25000e-05, loss: 3.2337
Epoch [59][1650/1780], lr: 1.25000e-05, loss: 3.7927
Epoch [59][1655/1780], lr: 1.25000e-05, loss: 4.0534
Epoch [59][1660/1780], lr: 1.25000e-05, loss: 3.7141
Epoch [59][1665/1780], lr: 1.25000e-05, loss: 4.383
Epoch [59][1670/1780], lr: 1.25000e-05, loss: 2.588
Epoch [59][1675/1780], lr: 1.25000e-05, loss: 3.3428
Epoch [59][1680/1780], lr: 1.25000e-05, loss: 2.8875
Epoch [59][1685/1780], lr: 1.25000e-05, loss: 3.3424
Epoch [59][1690/1780], lr: 1.25000e-05, loss: 2.1318
Epoch [59][1695/1780], lr: 1.25000e-05, loss: 3.6858
Epoch [59][1700/1780], lr: 1.25000e-05, loss: 4.4678
Epoch [59][1705/1780], lr: 1.25000e-05, loss: 3.0485
Epoch [59][1710/1780], lr: 1.25000e-05, loss: 3.7335
Epoch [59][1715/1780], lr: 1.25000e-05, loss: 3.5477
Epoch [59][1720/1780], lr: 1.25000e-05, loss: 3.1757
Epoch [59][1725/1780], lr: 1.25000e-05, loss: 2.3638
Epoch [59][1730/1780], lr: 1.25000e-05, loss: 2.4904
Epoch [59][1735/1780], lr: 1.25000e-05, loss: 2.

Epoch [60][645/1780], lr: 1.25000e-05, loss: 3.0527
Epoch [60][650/1780], lr: 1.25000e-05, loss: 3.1045
Epoch [60][655/1780], lr: 1.25000e-05, loss: 3.3424
Epoch [60][660/1780], lr: 1.25000e-05, loss: 3.3402
Epoch [60][665/1780], lr: 1.25000e-05, loss: 3.4541
Epoch [60][670/1780], lr: 1.25000e-05, loss: 2.9584
Epoch [60][675/1780], lr: 1.25000e-05, loss: 4.0694
Epoch [60][680/1780], lr: 1.25000e-05, loss: 3.3795
Epoch [60][685/1780], lr: 1.25000e-05, loss: 3.8525
Epoch [60][690/1780], lr: 1.25000e-05, loss: 2.4495
Epoch [60][695/1780], lr: 1.25000e-05, loss: 3.2784
Epoch [60][700/1780], lr: 1.25000e-05, loss: 3.5244
Epoch [60][705/1780], lr: 1.25000e-05, loss: 3.3978
Epoch [60][710/1780], lr: 1.25000e-05, loss: 3.8266
Epoch [60][715/1780], lr: 1.25000e-05, loss: 3.8852
Epoch [60][720/1780], lr: 1.25000e-05, loss: 3.1217
Epoch [60][725/1780], lr: 1.25000e-05, loss: 3.5191
Epoch [60][730/1780], lr: 1.25000e-05, loss: 3.913
Epoch [60][735/1780], lr: 1.25000e-05, loss: 2.9392
Epoch [60][74

Epoch [60][1430/1780], lr: 1.25000e-05, loss: 3.4475
Epoch [60][1435/1780], lr: 1.25000e-05, loss: 3.2348
Epoch [60][1440/1780], lr: 1.25000e-05, loss: 3.4628
Epoch [60][1445/1780], lr: 1.25000e-05, loss: 2.7227
Epoch [60][1450/1780], lr: 1.25000e-05, loss: 3.7511
Epoch [60][1455/1780], lr: 1.25000e-05, loss: 4.2508
Epoch [60][1460/1780], lr: 1.25000e-05, loss: 3.3528
Epoch [60][1465/1780], lr: 1.25000e-05, loss: 3.1001
Epoch [60][1470/1780], lr: 1.25000e-05, loss: 2.5433
Epoch [60][1475/1780], lr: 1.25000e-05, loss: 3.4942
Epoch [60][1480/1780], lr: 1.25000e-05, loss: 3.5479
Epoch [60][1485/1780], lr: 1.25000e-05, loss: 3.1392
Epoch [60][1490/1780], lr: 1.25000e-05, loss: 2.7549
Epoch [60][1495/1780], lr: 1.25000e-05, loss: 2.9255
Epoch [60][1500/1780], lr: 1.25000e-05, loss: 3.1755
Epoch [60][1505/1780], lr: 1.25000e-05, loss: 4.1222
Epoch [60][1510/1780], lr: 1.25000e-05, loss: 3.297
Epoch [60][1515/1780], lr: 1.25000e-05, loss: 3.0452
Epoch [60][1520/1780], lr: 1.25000e-05, loss: 3

Epoch [61][425/1780], lr: 1.25000e-05, loss: 2.6821
Epoch [61][430/1780], lr: 1.25000e-05, loss: 3.4094
Epoch [61][435/1780], lr: 1.25000e-05, loss: 4.0517
Epoch [61][440/1780], lr: 1.25000e-05, loss: 3.7126
Epoch [61][445/1780], lr: 1.25000e-05, loss: 2.1938
Epoch [61][450/1780], lr: 1.25000e-05, loss: 3.4422
Epoch [61][455/1780], lr: 1.25000e-05, loss: 3.4851
Epoch [61][460/1780], lr: 1.25000e-05, loss: 2.7799
Epoch [61][465/1780], lr: 1.25000e-05, loss: 3.1261
Epoch [61][470/1780], lr: 1.25000e-05, loss: 3.6838
Epoch [61][475/1780], lr: 1.25000e-05, loss: 3.0221
Epoch [61][480/1780], lr: 1.25000e-05, loss: 3.2764
Epoch [61][485/1780], lr: 1.25000e-05, loss: 2.7788
Epoch [61][490/1780], lr: 1.25000e-05, loss: 3.1383
Epoch [61][495/1780], lr: 1.25000e-05, loss: 2.7569
Epoch [61][500/1780], lr: 1.25000e-05, loss: 2.9379
Epoch [61][505/1780], lr: 1.25000e-05, loss: 2.4442
Epoch [61][510/1780], lr: 1.25000e-05, loss: 3.2371
Epoch [61][515/1780], lr: 1.25000e-05, loss: 2.5671
Epoch [61][5

Epoch [61][1210/1780], lr: 1.25000e-05, loss: 2.6061
Epoch [61][1215/1780], lr: 1.25000e-05, loss: 3.6805
Epoch [61][1220/1780], lr: 1.25000e-05, loss: 2.1705
Epoch [61][1225/1780], lr: 1.25000e-05, loss: 3.0386
Epoch [61][1230/1780], lr: 1.25000e-05, loss: 2.7401
Epoch [61][1235/1780], lr: 1.25000e-05, loss: 3.3521
Epoch [61][1240/1780], lr: 1.25000e-05, loss: 2.9859
Epoch [61][1245/1780], lr: 1.25000e-05, loss: 2.9319
Epoch [61][1250/1780], lr: 1.25000e-05, loss: 3.4581
Epoch [61][1255/1780], lr: 1.25000e-05, loss: 2.8757
Epoch [61][1260/1780], lr: 1.25000e-05, loss: 3.6553
Epoch [61][1265/1780], lr: 1.25000e-05, loss: 2.8299
Epoch [61][1270/1780], lr: 1.25000e-05, loss: 3.847
Epoch [61][1275/1780], lr: 1.25000e-05, loss: 3.0613
Epoch [61][1280/1780], lr: 1.25000e-05, loss: 2.4173
Epoch [61][1285/1780], lr: 1.25000e-05, loss: 4.1623
Epoch [61][1290/1780], lr: 1.25000e-05, loss: 2.2358
Epoch [61][1295/1780], lr: 1.25000e-05, loss: 3.5725
Epoch [61][1300/1780], lr: 1.25000e-05, loss: 3

RuntimeError: [enforce fail at inline_container.cc:300] . unexpected pos 5366784 vs 5366680

--- Logging error ---
Traceback (most recent call last):
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/logging/__init__.py", line 1089, in emit
    self.flush()
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/logging/__init__.py", line 1069, in flush
    self.stream.flush()
OSError: [Errno 28] No space left on device
Call stack:
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/threading.py", line 890, in _bootstrap
    self._bootstrap_inner()
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/site-packages/wandb/sdk/internal/internal_util.py", line 50, in run
    self._run()
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/site-packages/wandb/sdk/internal/internal_util.py", line 101, in _run
    self._process(record)
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/site-packages/wandb/sdk/internal/internal.py", lin

  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/site-packages/wandb/filesync/dir_watcher.py", line 406, in finish
    logger.info("scan save: %s %s", file_path, save_name)
Message: 'scan save: %s %s'
Arguments: ('/home/myuser1/Desktop/sign-language-summer-research/wandb/run-20230308_184335-15weuvrr/files/config.yaml', 'config.yaml')
--- Logging error ---
Traceback (most recent call last):
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/logging/__init__.py", line 1089, in emit
    self.flush()
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/logging/__init__.py", line 1069, in flush
    self.stream.flush()
OSError: [Errno 28] No space left on device
Call stack:
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/threading.py", line 890, in _bootstrap
    self._bootstrap_inner()
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/home/myuser1/miniconda3/envs/wlasl/lib/python3.8/s