## Imports

In [1]:
import os
import torch
import torch.nn as nn
import wandb
import numpy as np

from dataset.dataset import MultiModalDataset
from mmcv_model.mmcv_csn import ResNet3dCSN
from mmcv_model.scheduler import GradualWarmupScheduler

from model.multimodal_neck import MultiModalNeck
from model.simple_head import SimpleHead
from model.flow_autoencoder import FlowAutoencoder



## Training funtions

In [2]:

def top_k_accuracy(scores, labels, topk=(1, )):
    """Calculate top k accuracy score.
    Args:
        scores (list[np.ndarray]): Prediction scores for each class.
        labels (list[int]): Ground truth labels.
        topk (tuple[int]): K value for top_k_accuracy. Default: (1, ).
    Returns:
        list[float]: Top k accuracy score for each k.
    """
    res = np.zeros(len(topk))
    labels = np.array(labels)[:, np.newaxis]
    for i, k in enumerate(topk):
        max_k_preds = np.argsort(scores, axis=1)[:, -k:][:, ::-1]
        match_array = np.logical_or.reduce(max_k_preds == labels, axis=1)
        topk_acc_score = match_array.sum() / match_array.shape[0]
        res[i] = topk_acc_score

    return res


def train_one_epoch(epoch_index, interval=5):
    """Run one epoch for training.
    Args:
        epoch_index (int): Current epoch.
        interval (int): Frequency at which to print logs.
    Returns:
        last_loss (float): Loss value for the last batch.
    """
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, results in enumerate(train_loader):
        rgb = results['rgb']
        flow = results['flow']
        targets = results['label']
        targets = targets.reshape(-1, )

        rgb, flow, targets = rgb.to(device), flow.to(device), targets.to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(rgb=rgb, 
                        flow=flow)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, targets)
        loss.backward()

        # Gradient Clipping
        torch.nn.utils.clip_grad_norm_(
            model.parameters(), max_norm=40, norm_type=2.0)

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % interval == interval-1:
            last_loss = running_loss / interval  # loss per batch
            print(
                f'Epoch [{epoch_index}][{i+1}/{len(train_loader)}], lr: {scheduler.get_last_lr()[0]:.5e}, loss: {last_loss:.5}')
            running_loss = 0.

    return last_loss, scheduler.get_last_lr()[0]


def validate():
    """Run one epoch for validation.
    Returns:
        avg_vloss (float): Validation loss value for the last batch.
        top1_acc (float): Top-1 accuracy in decimal.
        top5_acc (float): Top-5 accuracy in decimal.
    """
    running_vloss = 0.0
    running_vacc = np.zeros(2)

    print('Evaluating top_k_accuracy...')

    model.eval()
    with torch.inference_mode():
        for i, results in enumerate(test_loader):
            rgb = results['rgb']
            flow = results['flow']
            vtargets = results['label']

            vtargets = vtargets.reshape(-1, )

            rgb, flow, vtargets = rgb.to(device), flow.to(device), vtargets.to(device)

            voutputs = model(rgb=rgb,
                             flow=flow)

            vloss = loss_fn(voutputs, vtargets)
            running_vloss += vloss

            running_vacc += top_k_accuracy(voutputs.detach().cpu().numpy(),
                                           vtargets.detach().cpu().numpy(), topk=(1, 5))

    avg_vloss = running_vloss / (i + 1)

    acc = running_vacc/len(test_loader)
    top1_acc = acc[0].item()
    top5_acc = acc[1].item()

    return (avg_vloss, top1_acc, top5_acc)

## Get last layer of CSN

In [3]:
class CSNBottleneck(nn.Module):
    expansion = 4
    
    def __init__(self, in_channels, channels, stride=1, mode='ip'):
        super().__init__()
        
        assert mode in ['ip', 'ir']
        self.mode = mode
        
        self.conv1 = nn.Conv3d(in_channels, channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm3d(channels)
        self.relu = nn.ReLU(inplace=True)
        
        conv2 = []
        if self.mode == 'ip':
            conv2.append(nn.Conv3d(channels, channels, kernel_size=1, stride=1, bias=False))
        conv2.append(nn.Conv3d(channels, channels, kernel_size=3, stride=stride, padding=1, bias=False, groups=channels))
        self.conv2 = nn.Sequential(*conv2)
        self.bn2 = nn.BatchNorm3d(channels)
        
        self.conv3 = nn.Conv3d(channels, channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm3d(channels * self.expansion)
        
        self.downsample = nn.Sequential()
        if stride != 1 or in_channels != channels * self.expansion:
            self.downsample = nn.Sequential(
                nn.Conv3d(in_channels, channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm3d(channels * self.expansion)
            )
        
    def forward(self, x):
        shortcut = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        
        out = self.conv3(out)
        out = self.bn3(out)
            
        out += shortcut
        out = self.relu(out)
        
        return out
    
class Fusion(nn.Module):
    def __init__(self,
                 in_channels,
                 channels,
                 n_blocks=3,
                 stride=2,
                 device='cuda'):
        
        super().__init__()
        block=CSNBottleneck
        
        layers = []
        layers.append(block(in_channels, channels, stride, mode='ir'))
        in_channels = channels * block.expansion
        for i in range(1, n_blocks):
            layers.append(block(in_channels, channels, mode='ir'))

        self.fusion = nn.Sequential(*layers).to(device)
        self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.device = device

        
    def forward(self,
               rgb=None,
               flow=None):
        
        out = torch.tensor([]).to(self.device)
        
        if rgb is not None:
            out = torch.concat((out, rgb[-2]), dim=1)
            
        if flow is not None:
            out = torch.concat((out, flow[-2]), dim=1)
            
        fused = self.fusion(out)
        fused_flattened = torch.flatten(self.avg_pool(fused), start_dim=1)
        
        return fused_flattened

## Seven-Sees-Net-V2

In [4]:
class SevenSeesNetV2(nn.Module):
    def __init__(self,
                 rgb_backbone,
                 flow_backbone,
                 fusion,
                 head
                ):
        super(SevenSeesNetV2, self).__init__()
        self.rgb_backbone=rgb_backbone
        self.flow_backbone=flow_backbone
        self.fusion=fusion
        self.head=head
            
    def forward(self,
               rgb,
               flow):
        
        rgb_out=self.rgb_backbone(rgb)
        flow_out=self.flow_backbone(flow)
        
        fusion_out=self.fusion(rgb=rgb_out,
                               flow=flow_out
                              )
        
        return self.head(fusion_out)

## Model Assembly

In [None]:
print('Loading rgb backbone checkpoint...')
rgb_checkpoint = torch.load('rgb_backbone.pth')
print('Loading flow backbone checkpoint...')
flow_checkpoint = torch.load('flow_backbone.pth')

os.chdir('../../..')

wandb.init(entity="cares", project="jack-slr",
           group="fusion", name="7sees-v2")

# Set up device agnostic code
device = 'cuda'

# Configs
work_dir = 'work_dirs/7sees-v2/'
batch_size = 1

os.makedirs(work_dir, exist_ok=True)

train_dataset = MultiModalDataset(ann_file='data/wlasl/train_annotations.txt',
                                  root_dir='data/wlasl/rawframes',
                                  clip_len=32,
                                  modalities=('rgb', 'flow'),
                                  resolution=224,
                                  frame_interval=1,
                                  input_resolution=256,
                                  num_clips=1
                                  )

test_dataset = MultiModalDataset(ann_file='data/wlasl/test_annotations.txt',
                                 root_dir='data/wlasl/rawframes',
                                 clip_len=32,
                                 resolution=224,
                                 modalities=('rgb', 'flow'),
                                 test_mode=True,
                                 frame_interval=1,
                                 input_resolution=256,
                                 num_clips=1
                                 )


# Setting up dataloaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=4,
                                           pin_memory=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=4,
                                          pin_memory=True)

# Custom multimodal model
rgb_backbone = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True,
    out_indices=(0,1,2,3,)
)

# rgb_backbone.init_weights()

rgb_backbone.load_state_dict(rgb_checkpoint)
del rgb_checkpoint

flow_backbone = ResNet3dCSN(
    pretrained2d=False,
    # pretrained=None,
    pretrained='https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r50_ig65m_20210617-ce545a37.pth',
    depth=50,
    with_pool2=False,
    bottleneck_mode='ir',
    norm_eval=True,
    zero_init_residual=False,
    bn_frozen=True,
    out_indices=(0,1,2,3,)
)

# flow_backbone.init_weights()

flow_backbone.load_state_dict(flow_checkpoint)
del flow_checkpoint

print('Backbones loaded successfully.')

# Freeze the backbones
for name, para in rgb_backbone.named_parameters():
    para.requires_grad = False

for name, para in flow_backbone.named_parameters():
    para.requires_grad = False


fusion = Fusion(in_channels=2048,
                n_blocks=3,
                channels=512)

head = SimpleHead(num_classes=400,
                      in_channels=2048,
                      dropout_ratio=0.5,
                      init_std=0.01)

head.init_weights()

model = SevenSeesNetV2(rgb_backbone=rgb_backbone,
                       flow_backbone=flow_backbone,
                       fusion=fusion,
                       head=head
                      )

# # Load model checkpoint
# checkpoint = torch.load(work_dir+'latest.pth')
# model.load_state_dict(checkpoint)

# Specify optimizer
optimizer = torch.optim.SGD(
    model.parameters(), lr=0.000125, momentum=0.9, weight_decay=0.00001)

# Specify Loss
loss_cls = nn.CrossEntropyLoss()

# Specify total epochs
epochs = 100

# Specify learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=120, gamma=0.1)

scheduler_steplr = torch.optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=[34, 84], gamma=0.1)
scheduler = GradualWarmupScheduler(
    optimizer, multiplier=1, total_epoch=16, after_scheduler=scheduler_steplr)

# Specify Loss
loss_fn = nn.CrossEntropyLoss()

# Setup wandb
wandb.watch(model, log_freq=10)

# Train Loop

# Transfer model to device
model.to(device)

for epoch in range(epochs):
    # Turn on gradient tracking and do a forward pass
    model.train(True)
    avg_loss, learning_rate = train_one_epoch(epoch+1)

    # Turn off  gradients for reporting
    model.train(False)

    avg_vloss, top1_acc, top5_acc = validate()

    print(
        f'top1_acc: {top1_acc:.4}, top5_acc: {top5_acc:.4}, train_loss: {avg_loss:.5}, val_loss: {avg_vloss:.5}')

    # Track best performance, and save the model's state
    model_path = work_dir + f'epoch_{epoch+1}.pth'
    print(f'Saving checkpoint at {epoch+1} epochs...')
    torch.save(model.state_dict(), model_path)

    # Adjust learning rate
    scheduler.step()

    # Track wandb
    wandb.log({'train/loss': avg_loss,
               'train/learning_rate': learning_rate,
               'val/loss': avg_vloss,
               'val/top1_accuracy': top1_acc,
               'val/top5_accuracy': top5_acc})

Loading rgb backbone checkpoint...
Loading flow backbone checkpoint...


[34m[1mwandb[0m: Currently logged in as: [33msttaseen[0m ([33mcares[0m). Use [1m`wandb login --relogin`[0m to force relogin


Backbones loaded successfully.
Epoch [1][5/1780], lr: 0.00000e+00, loss: 6.1724
Epoch [1][10/1780], lr: 0.00000e+00, loss: 5.7108
Epoch [1][15/1780], lr: 0.00000e+00, loss: 5.9568
Epoch [1][20/1780], lr: 0.00000e+00, loss: 6.231
Epoch [1][25/1780], lr: 0.00000e+00, loss: 5.6553
Epoch [1][30/1780], lr: 0.00000e+00, loss: 6.0661
Epoch [1][35/1780], lr: 0.00000e+00, loss: 6.0006
Epoch [1][40/1780], lr: 0.00000e+00, loss: 5.8929
Epoch [1][45/1780], lr: 0.00000e+00, loss: 6.0036
Epoch [1][50/1780], lr: 0.00000e+00, loss: 6.1425
Epoch [1][55/1780], lr: 0.00000e+00, loss: 6.1664
Epoch [1][60/1780], lr: 0.00000e+00, loss: 5.7424
Epoch [1][65/1780], lr: 0.00000e+00, loss: 6.2042
Epoch [1][70/1780], lr: 0.00000e+00, loss: 6.6218
Epoch [1][75/1780], lr: 0.00000e+00, loss: 5.8833
Epoch [1][80/1780], lr: 0.00000e+00, loss: 6.6026
Epoch [1][85/1780], lr: 0.00000e+00, loss: 6.04
Epoch [1][90/1780], lr: 0.00000e+00, loss: 5.9166
Epoch [1][95/1780], lr: 0.00000e+00, loss: 5.6344
Epoch [1][100/1780], lr

Epoch [1][810/1780], lr: 0.00000e+00, loss: 5.917
Epoch [1][815/1780], lr: 0.00000e+00, loss: 6.6082
Epoch [1][820/1780], lr: 0.00000e+00, loss: 6.5335
Epoch [1][825/1780], lr: 0.00000e+00, loss: 6.1024
Epoch [1][830/1780], lr: 0.00000e+00, loss: 6.4501
Epoch [1][835/1780], lr: 0.00000e+00, loss: 6.5067
Epoch [1][840/1780], lr: 0.00000e+00, loss: 6.1651
Epoch [1][845/1780], lr: 0.00000e+00, loss: 6.3023
Epoch [1][850/1780], lr: 0.00000e+00, loss: 5.7664
Epoch [1][855/1780], lr: 0.00000e+00, loss: 6.0032
Epoch [1][860/1780], lr: 0.00000e+00, loss: 5.6969
Epoch [1][865/1780], lr: 0.00000e+00, loss: 6.0283
Epoch [1][870/1780], lr: 0.00000e+00, loss: 6.2686
Epoch [1][875/1780], lr: 0.00000e+00, loss: 6.2061
Epoch [1][880/1780], lr: 0.00000e+00, loss: 5.9975
Epoch [1][885/1780], lr: 0.00000e+00, loss: 6.2705
Epoch [1][890/1780], lr: 0.00000e+00, loss: 5.7807
Epoch [1][895/1780], lr: 0.00000e+00, loss: 6.0843
Epoch [1][900/1780], lr: 0.00000e+00, loss: 6.063
Epoch [1][905/1780], lr: 0.00000e

Epoch [1][1605/1780], lr: 0.00000e+00, loss: 5.6768
Epoch [1][1610/1780], lr: 0.00000e+00, loss: 6.2923
Epoch [1][1615/1780], lr: 0.00000e+00, loss: 6.4945
Epoch [1][1620/1780], lr: 0.00000e+00, loss: 6.1153
Epoch [1][1625/1780], lr: 0.00000e+00, loss: 6.2038
Epoch [1][1630/1780], lr: 0.00000e+00, loss: 6.0551
Epoch [1][1635/1780], lr: 0.00000e+00, loss: 6.2793
Epoch [1][1640/1780], lr: 0.00000e+00, loss: 5.8342
Epoch [1][1645/1780], lr: 0.00000e+00, loss: 6.2614
Epoch [1][1650/1780], lr: 0.00000e+00, loss: 6.4802
Epoch [1][1655/1780], lr: 0.00000e+00, loss: 6.0875
Epoch [1][1660/1780], lr: 0.00000e+00, loss: 6.0804
Epoch [1][1665/1780], lr: 0.00000e+00, loss: 6.1689
Epoch [1][1670/1780], lr: 0.00000e+00, loss: 6.1195
Epoch [1][1675/1780], lr: 0.00000e+00, loss: 6.1558
Epoch [1][1680/1780], lr: 0.00000e+00, loss: 6.1484
Epoch [1][1685/1780], lr: 0.00000e+00, loss: 6.3759
Epoch [1][1690/1780], lr: 0.00000e+00, loss: 5.9028
Epoch [1][1695/1780], lr: 0.00000e+00, loss: 6.2407
Epoch [1][17

Epoch [2][615/1780], lr: 7.81250e-06, loss: 5.4359
Epoch [2][620/1780], lr: 7.81250e-06, loss: 5.5996
Epoch [2][625/1780], lr: 7.81250e-06, loss: 6.0601
Epoch [2][630/1780], lr: 7.81250e-06, loss: 5.9739
Epoch [2][635/1780], lr: 7.81250e-06, loss: 5.6146
Epoch [2][640/1780], lr: 7.81250e-06, loss: 5.7064
Epoch [2][645/1780], lr: 7.81250e-06, loss: 5.4984
Epoch [2][650/1780], lr: 7.81250e-06, loss: 5.4624
Epoch [2][655/1780], lr: 7.81250e-06, loss: 6.0148
Epoch [2][660/1780], lr: 7.81250e-06, loss: 5.3594
Epoch [2][665/1780], lr: 7.81250e-06, loss: 6.1484
Epoch [2][670/1780], lr: 7.81250e-06, loss: 5.9118
Epoch [2][675/1780], lr: 7.81250e-06, loss: 5.5196
Epoch [2][680/1780], lr: 7.81250e-06, loss: 5.8769
Epoch [2][685/1780], lr: 7.81250e-06, loss: 5.3663
Epoch [2][690/1780], lr: 7.81250e-06, loss: 5.2972
Epoch [2][695/1780], lr: 7.81250e-06, loss: 6.4022
Epoch [2][700/1780], lr: 7.81250e-06, loss: 6.0573
Epoch [2][705/1780], lr: 7.81250e-06, loss: 6.0391
Epoch [2][710/1780], lr: 7.8125

Epoch [2][1415/1780], lr: 7.81250e-06, loss: 5.3558
Epoch [2][1420/1780], lr: 7.81250e-06, loss: 4.8504
Epoch [2][1425/1780], lr: 7.81250e-06, loss: 5.4692
Epoch [2][1430/1780], lr: 7.81250e-06, loss: 5.4179
Epoch [2][1435/1780], lr: 7.81250e-06, loss: 5.1933
Epoch [2][1440/1780], lr: 7.81250e-06, loss: 5.3801
Epoch [2][1445/1780], lr: 7.81250e-06, loss: 5.2326
Epoch [2][1450/1780], lr: 7.81250e-06, loss: 5.8059
Epoch [2][1455/1780], lr: 7.81250e-06, loss: 5.5522
Epoch [2][1460/1780], lr: 7.81250e-06, loss: 4.9861
Epoch [2][1465/1780], lr: 7.81250e-06, loss: 5.1843
Epoch [2][1470/1780], lr: 7.81250e-06, loss: 5.0351
Epoch [2][1475/1780], lr: 7.81250e-06, loss: 5.3253
Epoch [2][1480/1780], lr: 7.81250e-06, loss: 5.4706
Epoch [2][1485/1780], lr: 7.81250e-06, loss: 5.3833
Epoch [2][1490/1780], lr: 7.81250e-06, loss: 5.778
Epoch [2][1495/1780], lr: 7.81250e-06, loss: 5.1722
Epoch [2][1500/1780], lr: 7.81250e-06, loss: 5.6012
Epoch [2][1505/1780], lr: 7.81250e-06, loss: 5.2028
Epoch [2][151

Epoch [3][425/1780], lr: 1.56250e-05, loss: 4.9599
Epoch [3][430/1780], lr: 1.56250e-05, loss: 5.5764
Epoch [3][435/1780], lr: 1.56250e-05, loss: 5.1853
Epoch [3][440/1780], lr: 1.56250e-05, loss: 4.8727
Epoch [3][445/1780], lr: 1.56250e-05, loss: 4.8942
Epoch [3][450/1780], lr: 1.56250e-05, loss: 4.938
Epoch [3][455/1780], lr: 1.56250e-05, loss: 4.808
Epoch [3][460/1780], lr: 1.56250e-05, loss: 5.1055
Epoch [3][465/1780], lr: 1.56250e-05, loss: 4.973
Epoch [3][470/1780], lr: 1.56250e-05, loss: 5.5506
Epoch [3][475/1780], lr: 1.56250e-05, loss: 4.7733
Epoch [3][480/1780], lr: 1.56250e-05, loss: 5.015
Epoch [3][485/1780], lr: 1.56250e-05, loss: 5.1701
Epoch [3][490/1780], lr: 1.56250e-05, loss: 5.4629
Epoch [3][495/1780], lr: 1.56250e-05, loss: 5.4852
Epoch [3][500/1780], lr: 1.56250e-05, loss: 5.5729
Epoch [3][505/1780], lr: 1.56250e-05, loss: 5.2598
Epoch [3][510/1780], lr: 1.56250e-05, loss: 5.0276
Epoch [3][515/1780], lr: 1.56250e-05, loss: 4.8664
Epoch [3][520/1780], lr: 1.56250e-0

Epoch [3][1230/1780], lr: 1.56250e-05, loss: 5.2475
Epoch [3][1235/1780], lr: 1.56250e-05, loss: 4.9073
Epoch [3][1240/1780], lr: 1.56250e-05, loss: 5.282
Epoch [3][1245/1780], lr: 1.56250e-05, loss: 5.1704
Epoch [3][1250/1780], lr: 1.56250e-05, loss: 4.8341
Epoch [3][1255/1780], lr: 1.56250e-05, loss: 4.9696
Epoch [3][1260/1780], lr: 1.56250e-05, loss: 4.9478
Epoch [3][1265/1780], lr: 1.56250e-05, loss: 5.4759
Epoch [3][1270/1780], lr: 1.56250e-05, loss: 5.357
Epoch [3][1275/1780], lr: 1.56250e-05, loss: 4.7403
Epoch [3][1280/1780], lr: 1.56250e-05, loss: 5.2563
Epoch [3][1285/1780], lr: 1.56250e-05, loss: 4.6429
Epoch [3][1290/1780], lr: 1.56250e-05, loss: 4.5778
Epoch [3][1295/1780], lr: 1.56250e-05, loss: 5.0579
Epoch [3][1300/1780], lr: 1.56250e-05, loss: 5.0632
Epoch [3][1305/1780], lr: 1.56250e-05, loss: 4.9374
Epoch [3][1310/1780], lr: 1.56250e-05, loss: 4.8953
Epoch [3][1315/1780], lr: 1.56250e-05, loss: 4.5943
Epoch [3][1320/1780], lr: 1.56250e-05, loss: 4.8217
Epoch [3][1325

Epoch [4][235/1780], lr: 2.34375e-05, loss: 4.9521
Epoch [4][240/1780], lr: 2.34375e-05, loss: 4.7683
Epoch [4][245/1780], lr: 2.34375e-05, loss: 4.8053
Epoch [4][250/1780], lr: 2.34375e-05, loss: 4.8078
Epoch [4][255/1780], lr: 2.34375e-05, loss: 5.2613
Epoch [4][260/1780], lr: 2.34375e-05, loss: 4.7873
Epoch [4][265/1780], lr: 2.34375e-05, loss: 4.6212
Epoch [4][270/1780], lr: 2.34375e-05, loss: 5.2225
Epoch [4][275/1780], lr: 2.34375e-05, loss: 4.3408
Epoch [4][280/1780], lr: 2.34375e-05, loss: 5.0398
Epoch [4][285/1780], lr: 2.34375e-05, loss: 5.0273
Epoch [4][290/1780], lr: 2.34375e-05, loss: 5.0621
Epoch [4][295/1780], lr: 2.34375e-05, loss: 4.5547
Epoch [4][300/1780], lr: 2.34375e-05, loss: 4.8768
Epoch [4][305/1780], lr: 2.34375e-05, loss: 4.671
Epoch [4][310/1780], lr: 2.34375e-05, loss: 5.4958
Epoch [4][315/1780], lr: 2.34375e-05, loss: 4.5446
Epoch [4][320/1780], lr: 2.34375e-05, loss: 4.7665
Epoch [4][325/1780], lr: 2.34375e-05, loss: 4.7721
Epoch [4][330/1780], lr: 2.34375

Epoch [4][1040/1780], lr: 2.34375e-05, loss: 4.721
Epoch [4][1045/1780], lr: 2.34375e-05, loss: 4.9349
Epoch [4][1050/1780], lr: 2.34375e-05, loss: 5.6488
Epoch [4][1055/1780], lr: 2.34375e-05, loss: 4.4608
Epoch [4][1060/1780], lr: 2.34375e-05, loss: 4.745
Epoch [4][1065/1780], lr: 2.34375e-05, loss: 5.2446
Epoch [4][1070/1780], lr: 2.34375e-05, loss: 4.919
Epoch [4][1075/1780], lr: 2.34375e-05, loss: 4.5687
Epoch [4][1080/1780], lr: 2.34375e-05, loss: 5.0745
Epoch [4][1085/1780], lr: 2.34375e-05, loss: 5.034
Epoch [4][1090/1780], lr: 2.34375e-05, loss: 4.8207
Epoch [4][1095/1780], lr: 2.34375e-05, loss: 4.4172
Epoch [4][1100/1780], lr: 2.34375e-05, loss: 4.9723
Epoch [4][1105/1780], lr: 2.34375e-05, loss: 4.7819
Epoch [4][1110/1780], lr: 2.34375e-05, loss: 4.8879
Epoch [4][1115/1780], lr: 2.34375e-05, loss: 4.8084
Epoch [4][1120/1780], lr: 2.34375e-05, loss: 5.0509
Epoch [4][1125/1780], lr: 2.34375e-05, loss: 4.5022
Epoch [4][1130/1780], lr: 2.34375e-05, loss: 4.7471
Epoch [4][1135/1

Epoch [5][40/1780], lr: 3.12500e-05, loss: 4.4891
Epoch [5][45/1780], lr: 3.12500e-05, loss: 4.6853
Epoch [5][50/1780], lr: 3.12500e-05, loss: 4.776
Epoch [5][55/1780], lr: 3.12500e-05, loss: 4.7146
Epoch [5][60/1780], lr: 3.12500e-05, loss: 4.965
Epoch [5][65/1780], lr: 3.12500e-05, loss: 4.5424
Epoch [5][70/1780], lr: 3.12500e-05, loss: 4.5862
Epoch [5][75/1780], lr: 3.12500e-05, loss: 4.8146
Epoch [5][80/1780], lr: 3.12500e-05, loss: 4.985
Epoch [5][85/1780], lr: 3.12500e-05, loss: 4.658
Epoch [5][90/1780], lr: 3.12500e-05, loss: 4.7765
Epoch [5][95/1780], lr: 3.12500e-05, loss: 5.0532
Epoch [5][100/1780], lr: 3.12500e-05, loss: 4.8326
Epoch [5][105/1780], lr: 3.12500e-05, loss: 4.6417
Epoch [5][110/1780], lr: 3.12500e-05, loss: 5.176
Epoch [5][115/1780], lr: 3.12500e-05, loss: 4.6153
Epoch [5][120/1780], lr: 3.12500e-05, loss: 4.7544
Epoch [5][125/1780], lr: 3.12500e-05, loss: 5.1608
Epoch [5][130/1780], lr: 3.12500e-05, loss: 4.5673
Epoch [5][135/1780], lr: 3.12500e-05, loss: 5.04

Epoch [5][850/1780], lr: 3.12500e-05, loss: 5.1116
Epoch [5][855/1780], lr: 3.12500e-05, loss: 5.0714
Epoch [5][860/1780], lr: 3.12500e-05, loss: 4.6081
Epoch [5][865/1780], lr: 3.12500e-05, loss: 4.8975
Epoch [5][870/1780], lr: 3.12500e-05, loss: 5.0176
Epoch [5][875/1780], lr: 3.12500e-05, loss: 4.8919
Epoch [5][880/1780], lr: 3.12500e-05, loss: 5.3962
Epoch [5][885/1780], lr: 3.12500e-05, loss: 4.754
Epoch [5][890/1780], lr: 3.12500e-05, loss: 4.6852
Epoch [5][895/1780], lr: 3.12500e-05, loss: 4.3781
Epoch [5][900/1780], lr: 3.12500e-05, loss: 5.1098
Epoch [5][905/1780], lr: 3.12500e-05, loss: 4.3437
Epoch [5][910/1780], lr: 3.12500e-05, loss: 4.6466
Epoch [5][915/1780], lr: 3.12500e-05, loss: 4.7983
Epoch [5][920/1780], lr: 3.12500e-05, loss: 4.9107
Epoch [5][925/1780], lr: 3.12500e-05, loss: 4.66
Epoch [5][930/1780], lr: 3.12500e-05, loss: 4.747
Epoch [5][935/1780], lr: 3.12500e-05, loss: 4.5728
Epoch [5][940/1780], lr: 3.12500e-05, loss: 5.1153
Epoch [5][945/1780], lr: 3.12500e-0

Epoch [5][1645/1780], lr: 3.12500e-05, loss: 4.5884
Epoch [5][1650/1780], lr: 3.12500e-05, loss: 4.7472
Epoch [5][1655/1780], lr: 3.12500e-05, loss: 4.8212
Epoch [5][1660/1780], lr: 3.12500e-05, loss: 4.7812
Epoch [5][1665/1780], lr: 3.12500e-05, loss: 4.9394
Epoch [5][1670/1780], lr: 3.12500e-05, loss: 4.6023
Epoch [5][1675/1780], lr: 3.12500e-05, loss: 4.9655
Epoch [5][1680/1780], lr: 3.12500e-05, loss: 4.9139
Epoch [5][1685/1780], lr: 3.12500e-05, loss: 4.6967
Epoch [5][1690/1780], lr: 3.12500e-05, loss: 4.9056
Epoch [5][1695/1780], lr: 3.12500e-05, loss: 4.5007
Epoch [5][1700/1780], lr: 3.12500e-05, loss: 4.844
Epoch [5][1705/1780], lr: 3.12500e-05, loss: 5.3653
Epoch [5][1710/1780], lr: 3.12500e-05, loss: 5.259
Epoch [5][1715/1780], lr: 3.12500e-05, loss: 4.9127
Epoch [5][1720/1780], lr: 3.12500e-05, loss: 5.1376
Epoch [5][1725/1780], lr: 3.12500e-05, loss: 4.5584
Epoch [5][1730/1780], lr: 3.12500e-05, loss: 5.0397
Epoch [5][1735/1780], lr: 3.12500e-05, loss: 5.0093
Epoch [5][1740

Epoch [6][660/1780], lr: 3.90625e-05, loss: 5.1377
Epoch [6][665/1780], lr: 3.90625e-05, loss: 5.4153
Epoch [6][670/1780], lr: 3.90625e-05, loss: 4.724
Epoch [6][675/1780], lr: 3.90625e-05, loss: 4.9651
Epoch [6][680/1780], lr: 3.90625e-05, loss: 4.6221
Epoch [6][685/1780], lr: 3.90625e-05, loss: 4.9621
Epoch [6][690/1780], lr: 3.90625e-05, loss: 4.8397
Epoch [6][695/1780], lr: 3.90625e-05, loss: 4.8355
Epoch [6][700/1780], lr: 3.90625e-05, loss: 4.9892
Epoch [6][705/1780], lr: 3.90625e-05, loss: 4.7421
Epoch [6][710/1780], lr: 3.90625e-05, loss: 4.0729
Epoch [6][715/1780], lr: 3.90625e-05, loss: 4.6993
Epoch [6][720/1780], lr: 3.90625e-05, loss: 4.9642
Epoch [6][725/1780], lr: 3.90625e-05, loss: 4.9557
Epoch [6][730/1780], lr: 3.90625e-05, loss: 5.1666
Epoch [6][735/1780], lr: 3.90625e-05, loss: 4.6859
Epoch [6][740/1780], lr: 3.90625e-05, loss: 5.2756
Epoch [6][745/1780], lr: 3.90625e-05, loss: 4.6856
Epoch [6][750/1780], lr: 3.90625e-05, loss: 4.5302
Epoch [6][755/1780], lr: 3.90625

Epoch [6][1460/1780], lr: 3.90625e-05, loss: 5.3357
Epoch [6][1465/1780], lr: 3.90625e-05, loss: 5.3768
Epoch [6][1470/1780], lr: 3.90625e-05, loss: 4.795
Epoch [6][1475/1780], lr: 3.90625e-05, loss: 4.3706
Epoch [6][1480/1780], lr: 3.90625e-05, loss: 5.191
Epoch [6][1485/1780], lr: 3.90625e-05, loss: 4.8205
Epoch [6][1490/1780], lr: 3.90625e-05, loss: 4.6032
Epoch [6][1495/1780], lr: 3.90625e-05, loss: 4.8162
Epoch [6][1500/1780], lr: 3.90625e-05, loss: 4.7843
Epoch [6][1505/1780], lr: 3.90625e-05, loss: 4.9373
Epoch [6][1510/1780], lr: 3.90625e-05, loss: 4.901
Epoch [6][1515/1780], lr: 3.90625e-05, loss: 4.5914
Epoch [6][1520/1780], lr: 3.90625e-05, loss: 4.7985
Epoch [6][1525/1780], lr: 3.90625e-05, loss: 5.1254
Epoch [6][1530/1780], lr: 3.90625e-05, loss: 4.8399
Epoch [6][1535/1780], lr: 3.90625e-05, loss: 5.0713
Epoch [6][1540/1780], lr: 3.90625e-05, loss: 4.5391
Epoch [6][1545/1780], lr: 3.90625e-05, loss: 5.1747
Epoch [6][1550/1780], lr: 3.90625e-05, loss: 4.8738
Epoch [6][1555/

Epoch [7][470/1780], lr: 4.68750e-05, loss: 4.6968
Epoch [7][475/1780], lr: 4.68750e-05, loss: 5.0715
Epoch [7][480/1780], lr: 4.68750e-05, loss: 4.9164
Epoch [7][485/1780], lr: 4.68750e-05, loss: 5.2222
Epoch [7][490/1780], lr: 4.68750e-05, loss: 4.7663
Epoch [7][495/1780], lr: 4.68750e-05, loss: 4.7067
Epoch [7][500/1780], lr: 4.68750e-05, loss: 4.6044
Epoch [7][505/1780], lr: 4.68750e-05, loss: 4.5146
Epoch [7][510/1780], lr: 4.68750e-05, loss: 3.972
Epoch [7][515/1780], lr: 4.68750e-05, loss: 4.6023
Epoch [7][520/1780], lr: 4.68750e-05, loss: 4.9534
Epoch [7][525/1780], lr: 4.68750e-05, loss: 5.2612
Epoch [7][530/1780], lr: 4.68750e-05, loss: 5.3004
Epoch [7][535/1780], lr: 4.68750e-05, loss: 5.3136
Epoch [7][540/1780], lr: 4.68750e-05, loss: 5.2373
Epoch [7][545/1780], lr: 4.68750e-05, loss: 5.08
Epoch [7][550/1780], lr: 4.68750e-05, loss: 5.0389
Epoch [7][555/1780], lr: 4.68750e-05, loss: 5.318
Epoch [7][560/1780], lr: 4.68750e-05, loss: 4.8131
Epoch [7][565/1780], lr: 4.68750e-0

Epoch [7][1275/1780], lr: 4.68750e-05, loss: 4.7179
Epoch [7][1280/1780], lr: 4.68750e-05, loss: 4.781
Epoch [7][1285/1780], lr: 4.68750e-05, loss: 5.3647
Epoch [7][1290/1780], lr: 4.68750e-05, loss: 4.6394
Epoch [7][1295/1780], lr: 4.68750e-05, loss: 4.8496
Epoch [7][1300/1780], lr: 4.68750e-05, loss: 5.0654
Epoch [7][1305/1780], lr: 4.68750e-05, loss: 5.0453
Epoch [7][1310/1780], lr: 4.68750e-05, loss: 4.7483
Epoch [7][1315/1780], lr: 4.68750e-05, loss: 4.2967
Epoch [7][1320/1780], lr: 4.68750e-05, loss: 4.3594
Epoch [7][1325/1780], lr: 4.68750e-05, loss: 4.6798
Epoch [7][1330/1780], lr: 4.68750e-05, loss: 4.7992
Epoch [7][1335/1780], lr: 4.68750e-05, loss: 5.0121
Epoch [7][1340/1780], lr: 4.68750e-05, loss: 4.8187
Epoch [7][1345/1780], lr: 4.68750e-05, loss: 5.4812
Epoch [7][1350/1780], lr: 4.68750e-05, loss: 4.6882
Epoch [7][1355/1780], lr: 4.68750e-05, loss: 4.7561
Epoch [7][1360/1780], lr: 4.68750e-05, loss: 5.2644
Epoch [7][1365/1780], lr: 4.68750e-05, loss: 4.8109
Epoch [7][137

Epoch [8][280/1780], lr: 5.46875e-05, loss: 4.9685
Epoch [8][285/1780], lr: 5.46875e-05, loss: 4.6669
Epoch [8][290/1780], lr: 5.46875e-05, loss: 5.1699
Epoch [8][295/1780], lr: 5.46875e-05, loss: 5.2719
Epoch [8][300/1780], lr: 5.46875e-05, loss: 4.7963
Epoch [8][305/1780], lr: 5.46875e-05, loss: 5.1849
Epoch [8][310/1780], lr: 5.46875e-05, loss: 5.0728
Epoch [8][315/1780], lr: 5.46875e-05, loss: 5.2664
Epoch [8][320/1780], lr: 5.46875e-05, loss: 4.7278
Epoch [8][325/1780], lr: 5.46875e-05, loss: 4.395
Epoch [8][330/1780], lr: 5.46875e-05, loss: 5.3047
Epoch [8][335/1780], lr: 5.46875e-05, loss: 4.6706
Epoch [8][340/1780], lr: 5.46875e-05, loss: 4.9397
Epoch [8][345/1780], lr: 5.46875e-05, loss: 5.5567
Epoch [8][350/1780], lr: 5.46875e-05, loss: 5.1459
Epoch [8][355/1780], lr: 5.46875e-05, loss: 4.831
Epoch [8][360/1780], lr: 5.46875e-05, loss: 5.3498
Epoch [8][365/1780], lr: 5.46875e-05, loss: 5.1227
Epoch [8][370/1780], lr: 5.46875e-05, loss: 4.6706
Epoch [8][375/1780], lr: 5.46875e

Epoch [8][1085/1780], lr: 5.46875e-05, loss: 4.8513
Epoch [8][1090/1780], lr: 5.46875e-05, loss: 4.7474
Epoch [8][1095/1780], lr: 5.46875e-05, loss: 4.8188
Epoch [8][1100/1780], lr: 5.46875e-05, loss: 5.5544
Epoch [8][1105/1780], lr: 5.46875e-05, loss: 5.2412
Epoch [8][1110/1780], lr: 5.46875e-05, loss: 4.9037
Epoch [8][1115/1780], lr: 5.46875e-05, loss: 4.9775
Epoch [8][1120/1780], lr: 5.46875e-05, loss: 4.9211
Epoch [8][1125/1780], lr: 5.46875e-05, loss: 4.6223
Epoch [8][1130/1780], lr: 5.46875e-05, loss: 5.509
Epoch [8][1135/1780], lr: 5.46875e-05, loss: 4.7333
Epoch [8][1140/1780], lr: 5.46875e-05, loss: 5.2371
Epoch [8][1145/1780], lr: 5.46875e-05, loss: 5.1195
Epoch [8][1150/1780], lr: 5.46875e-05, loss: 5.2222
Epoch [8][1155/1780], lr: 5.46875e-05, loss: 4.954
Epoch [8][1160/1780], lr: 5.46875e-05, loss: 5.008
Epoch [8][1165/1780], lr: 5.46875e-05, loss: 4.6607
Epoch [8][1170/1780], lr: 5.46875e-05, loss: 4.4608
Epoch [8][1175/1780], lr: 5.46875e-05, loss: 4.8332
Epoch [8][1180/

Epoch [9][85/1780], lr: 6.25000e-05, loss: 5.5503
Epoch [9][90/1780], lr: 6.25000e-05, loss: 5.1838
Epoch [9][95/1780], lr: 6.25000e-05, loss: 5.0226
Epoch [9][100/1780], lr: 6.25000e-05, loss: 5.6389
Epoch [9][105/1780], lr: 6.25000e-05, loss: 4.9627
Epoch [9][110/1780], lr: 6.25000e-05, loss: 4.9254
Epoch [9][115/1780], lr: 6.25000e-05, loss: 4.7485
Epoch [9][120/1780], lr: 6.25000e-05, loss: 5.6587
Epoch [9][125/1780], lr: 6.25000e-05, loss: 4.4893
Epoch [9][130/1780], lr: 6.25000e-05, loss: 5.4374
Epoch [9][135/1780], lr: 6.25000e-05, loss: 4.9038
Epoch [9][140/1780], lr: 6.25000e-05, loss: 4.8288
Epoch [9][145/1780], lr: 6.25000e-05, loss: 4.2567
Epoch [9][150/1780], lr: 6.25000e-05, loss: 5.1319
Epoch [9][155/1780], lr: 6.25000e-05, loss: 5.1119
Epoch [9][160/1780], lr: 6.25000e-05, loss: 4.8764
Epoch [9][165/1780], lr: 6.25000e-05, loss: 4.8301
Epoch [9][170/1780], lr: 6.25000e-05, loss: 5.16
Epoch [9][175/1780], lr: 6.25000e-05, loss: 5.3914
Epoch [9][180/1780], lr: 6.25000e-05

Epoch [9][890/1780], lr: 6.25000e-05, loss: 5.0257
Epoch [9][895/1780], lr: 6.25000e-05, loss: 4.4534
Epoch [9][900/1780], lr: 6.25000e-05, loss: 5.4636
Epoch [9][905/1780], lr: 6.25000e-05, loss: 4.8164
Epoch [9][910/1780], lr: 6.25000e-05, loss: 4.8741
Epoch [9][915/1780], lr: 6.25000e-05, loss: 4.9809
Epoch [9][920/1780], lr: 6.25000e-05, loss: 5.317
Epoch [9][925/1780], lr: 6.25000e-05, loss: 5.1168
Epoch [9][930/1780], lr: 6.25000e-05, loss: 5.1873
Epoch [9][935/1780], lr: 6.25000e-05, loss: 4.9691
Epoch [9][940/1780], lr: 6.25000e-05, loss: 4.8231
Epoch [9][945/1780], lr: 6.25000e-05, loss: 4.5552
Epoch [9][950/1780], lr: 6.25000e-05, loss: 4.8929
Epoch [9][955/1780], lr: 6.25000e-05, loss: 5.0795
Epoch [9][960/1780], lr: 6.25000e-05, loss: 5.318
Epoch [9][965/1780], lr: 6.25000e-05, loss: 5.2431
Epoch [9][970/1780], lr: 6.25000e-05, loss: 4.3618
Epoch [9][975/1780], lr: 6.25000e-05, loss: 4.8023
Epoch [9][980/1780], lr: 6.25000e-05, loss: 5.0808
Epoch [9][985/1780], lr: 6.25000e

Epoch [9][1685/1780], lr: 6.25000e-05, loss: 5.3739
Epoch [9][1690/1780], lr: 6.25000e-05, loss: 5.3379
Epoch [9][1695/1780], lr: 6.25000e-05, loss: 4.9386
Epoch [9][1700/1780], lr: 6.25000e-05, loss: 5.3692
Epoch [9][1705/1780], lr: 6.25000e-05, loss: 4.8644
Epoch [9][1710/1780], lr: 6.25000e-05, loss: 5.541
Epoch [9][1715/1780], lr: 6.25000e-05, loss: 5.2618
Epoch [9][1720/1780], lr: 6.25000e-05, loss: 4.4446
Epoch [9][1725/1780], lr: 6.25000e-05, loss: 4.6811
Epoch [9][1730/1780], lr: 6.25000e-05, loss: 5.1445
Epoch [9][1735/1780], lr: 6.25000e-05, loss: 4.8757
Epoch [9][1740/1780], lr: 6.25000e-05, loss: 5.1306
Epoch [9][1745/1780], lr: 6.25000e-05, loss: 4.7456
Epoch [9][1750/1780], lr: 6.25000e-05, loss: 5.5671
Epoch [9][1755/1780], lr: 6.25000e-05, loss: 4.825
Epoch [9][1760/1780], lr: 6.25000e-05, loss: 4.5669
Epoch [9][1765/1780], lr: 6.25000e-05, loss: 5.4282
Epoch [9][1770/1780], lr: 6.25000e-05, loss: 5.0697
Epoch [9][1775/1780], lr: 6.25000e-05, loss: 4.3586
Epoch [9][1780

Epoch [10][685/1780], lr: 7.03125e-05, loss: 5.0925
Epoch [10][690/1780], lr: 7.03125e-05, loss: 4.964
Epoch [10][695/1780], lr: 7.03125e-05, loss: 4.06
Epoch [10][700/1780], lr: 7.03125e-05, loss: 4.8675
Epoch [10][705/1780], lr: 7.03125e-05, loss: 5.4289
Epoch [10][710/1780], lr: 7.03125e-05, loss: 5.1027
Epoch [10][715/1780], lr: 7.03125e-05, loss: 5.519
Epoch [10][720/1780], lr: 7.03125e-05, loss: 5.3274
Epoch [10][725/1780], lr: 7.03125e-05, loss: 4.8206
Epoch [10][730/1780], lr: 7.03125e-05, loss: 4.6753
Epoch [10][735/1780], lr: 7.03125e-05, loss: 4.4869
Epoch [10][740/1780], lr: 7.03125e-05, loss: 5.2831
Epoch [10][745/1780], lr: 7.03125e-05, loss: 4.7258
Epoch [10][750/1780], lr: 7.03125e-05, loss: 4.3973
Epoch [10][755/1780], lr: 7.03125e-05, loss: 4.4537
Epoch [10][760/1780], lr: 7.03125e-05, loss: 4.7517
Epoch [10][765/1780], lr: 7.03125e-05, loss: 4.8452
Epoch [10][770/1780], lr: 7.03125e-05, loss: 5.64
Epoch [10][775/1780], lr: 7.03125e-05, loss: 4.7332
Epoch [10][780/178

Epoch [10][1470/1780], lr: 7.03125e-05, loss: 4.8571
Epoch [10][1475/1780], lr: 7.03125e-05, loss: 4.9623
Epoch [10][1480/1780], lr: 7.03125e-05, loss: 5.5205
Epoch [10][1485/1780], lr: 7.03125e-05, loss: 4.2126
Epoch [10][1490/1780], lr: 7.03125e-05, loss: 5.0955
Epoch [10][1495/1780], lr: 7.03125e-05, loss: 4.6833
Epoch [10][1500/1780], lr: 7.03125e-05, loss: 5.0759
Epoch [10][1505/1780], lr: 7.03125e-05, loss: 5.013
Epoch [10][1510/1780], lr: 7.03125e-05, loss: 5.4537
Epoch [10][1515/1780], lr: 7.03125e-05, loss: 5.4072
Epoch [10][1520/1780], lr: 7.03125e-05, loss: 4.8226
Epoch [10][1525/1780], lr: 7.03125e-05, loss: 5.2864
Epoch [10][1530/1780], lr: 7.03125e-05, loss: 5.6274
Epoch [10][1535/1780], lr: 7.03125e-05, loss: 5.1505
Epoch [10][1540/1780], lr: 7.03125e-05, loss: 5.1321
Epoch [10][1545/1780], lr: 7.03125e-05, loss: 5.59
Epoch [10][1550/1780], lr: 7.03125e-05, loss: 5.3424
Epoch [10][1555/1780], lr: 7.03125e-05, loss: 4.5961
Epoch [10][1560/1780], lr: 7.03125e-05, loss: 3.9

Epoch [11][465/1780], lr: 7.81250e-05, loss: 4.9387
Epoch [11][470/1780], lr: 7.81250e-05, loss: 5.1967
Epoch [11][475/1780], lr: 7.81250e-05, loss: 4.4478
Epoch [11][480/1780], lr: 7.81250e-05, loss: 5.112
Epoch [11][485/1780], lr: 7.81250e-05, loss: 4.8158
Epoch [11][490/1780], lr: 7.81250e-05, loss: 5.1991
Epoch [11][495/1780], lr: 7.81250e-05, loss: 4.5791
Epoch [11][500/1780], lr: 7.81250e-05, loss: 5.3507
Epoch [11][505/1780], lr: 7.81250e-05, loss: 4.6175
Epoch [11][510/1780], lr: 7.81250e-05, loss: 4.4916
Epoch [11][515/1780], lr: 7.81250e-05, loss: 5.3994
Epoch [11][520/1780], lr: 7.81250e-05, loss: 5.0505
Epoch [11][525/1780], lr: 7.81250e-05, loss: 4.8859
Epoch [11][530/1780], lr: 7.81250e-05, loss: 4.9596
Epoch [11][535/1780], lr: 7.81250e-05, loss: 4.9032
Epoch [11][540/1780], lr: 7.81250e-05, loss: 5.1896
Epoch [11][545/1780], lr: 7.81250e-05, loss: 5.0423
Epoch [11][550/1780], lr: 7.81250e-05, loss: 4.3629
Epoch [11][555/1780], lr: 7.81250e-05, loss: 4.8309
Epoch [11][56

Epoch [11][1250/1780], lr: 7.81250e-05, loss: 5.5237
Epoch [11][1255/1780], lr: 7.81250e-05, loss: 3.8391
Epoch [11][1260/1780], lr: 7.81250e-05, loss: 4.7603
Epoch [11][1265/1780], lr: 7.81250e-05, loss: 4.5988
Epoch [11][1270/1780], lr: 7.81250e-05, loss: 4.8519
Epoch [11][1275/1780], lr: 7.81250e-05, loss: 4.9116
Epoch [11][1280/1780], lr: 7.81250e-05, loss: 4.9687
Epoch [11][1285/1780], lr: 7.81250e-05, loss: 4.9559
Epoch [11][1290/1780], lr: 7.81250e-05, loss: 4.9727
Epoch [11][1295/1780], lr: 7.81250e-05, loss: 5.6555
Epoch [11][1300/1780], lr: 7.81250e-05, loss: 5.4887
Epoch [11][1305/1780], lr: 7.81250e-05, loss: 4.2254
Epoch [11][1310/1780], lr: 7.81250e-05, loss: 5.6053
Epoch [11][1315/1780], lr: 7.81250e-05, loss: 4.2887
Epoch [11][1320/1780], lr: 7.81250e-05, loss: 5.0412
Epoch [11][1325/1780], lr: 7.81250e-05, loss: 4.988
Epoch [11][1330/1780], lr: 7.81250e-05, loss: 5.5355
Epoch [11][1335/1780], lr: 7.81250e-05, loss: 4.6014
Epoch [11][1340/1780], lr: 7.81250e-05, loss: 5

Epoch [12][240/1780], lr: 8.59375e-05, loss: 5.11
Epoch [12][245/1780], lr: 8.59375e-05, loss: 4.8266
Epoch [12][250/1780], lr: 8.59375e-05, loss: 4.9533
Epoch [12][255/1780], lr: 8.59375e-05, loss: 4.551
Epoch [12][260/1780], lr: 8.59375e-05, loss: 4.8364
Epoch [12][265/1780], lr: 8.59375e-05, loss: 5.4214
Epoch [12][270/1780], lr: 8.59375e-05, loss: 5.1115
Epoch [12][275/1780], lr: 8.59375e-05, loss: 5.1841
Epoch [12][280/1780], lr: 8.59375e-05, loss: 5.8528
Epoch [12][285/1780], lr: 8.59375e-05, loss: 5.2248
Epoch [12][290/1780], lr: 8.59375e-05, loss: 5.7106
Epoch [12][295/1780], lr: 8.59375e-05, loss: 5.2969
Epoch [12][300/1780], lr: 8.59375e-05, loss: 5.4708
Epoch [12][305/1780], lr: 8.59375e-05, loss: 5.4676
Epoch [12][310/1780], lr: 8.59375e-05, loss: 5.2325
Epoch [12][315/1780], lr: 8.59375e-05, loss: 5.3465
Epoch [12][320/1780], lr: 8.59375e-05, loss: 4.5692
Epoch [12][325/1780], lr: 8.59375e-05, loss: 4.6286
Epoch [12][330/1780], lr: 8.59375e-05, loss: 4.7118
Epoch [12][335/

Epoch [12][1030/1780], lr: 8.59375e-05, loss: 4.54
Epoch [12][1035/1780], lr: 8.59375e-05, loss: 5.2729
Epoch [12][1040/1780], lr: 8.59375e-05, loss: 5.1569
Epoch [12][1045/1780], lr: 8.59375e-05, loss: 4.2227
Epoch [12][1050/1780], lr: 8.59375e-05, loss: 4.1649
Epoch [12][1055/1780], lr: 8.59375e-05, loss: 5.7333
Epoch [12][1060/1780], lr: 8.59375e-05, loss: 5.2539
Epoch [12][1065/1780], lr: 8.59375e-05, loss: 5.1327
Epoch [12][1070/1780], lr: 8.59375e-05, loss: 4.9468
Epoch [12][1075/1780], lr: 8.59375e-05, loss: 4.7147
Epoch [12][1080/1780], lr: 8.59375e-05, loss: 5.4765
Epoch [12][1085/1780], lr: 8.59375e-05, loss: 5.0198
Epoch [12][1090/1780], lr: 8.59375e-05, loss: 4.8167
Epoch [12][1095/1780], lr: 8.59375e-05, loss: 4.9064
Epoch [12][1100/1780], lr: 8.59375e-05, loss: 5.7454
Epoch [12][1105/1780], lr: 8.59375e-05, loss: 5.3181
Epoch [12][1110/1780], lr: 8.59375e-05, loss: 5.606
Epoch [12][1115/1780], lr: 8.59375e-05, loss: 5.3696
Epoch [12][1120/1780], lr: 8.59375e-05, loss: 5.1

Epoch [13][15/1780], lr: 9.37500e-05, loss: 4.4234
Epoch [13][20/1780], lr: 9.37500e-05, loss: 5.4671
Epoch [13][25/1780], lr: 9.37500e-05, loss: 5.3411
Epoch [13][30/1780], lr: 9.37500e-05, loss: 5.4298
Epoch [13][35/1780], lr: 9.37500e-05, loss: 4.9149
Epoch [13][40/1780], lr: 9.37500e-05, loss: 5.0395
Epoch [13][45/1780], lr: 9.37500e-05, loss: 5.2811
Epoch [13][50/1780], lr: 9.37500e-05, loss: 4.6721
Epoch [13][55/1780], lr: 9.37500e-05, loss: 4.8651
Epoch [13][60/1780], lr: 9.37500e-05, loss: 4.6092
Epoch [13][65/1780], lr: 9.37500e-05, loss: 4.973
Epoch [13][70/1780], lr: 9.37500e-05, loss: 4.8209
Epoch [13][75/1780], lr: 9.37500e-05, loss: 5.1749
Epoch [13][80/1780], lr: 9.37500e-05, loss: 4.9791
Epoch [13][85/1780], lr: 9.37500e-05, loss: 6.0038
Epoch [13][90/1780], lr: 9.37500e-05, loss: 4.6724
Epoch [13][95/1780], lr: 9.37500e-05, loss: 4.7876
Epoch [13][100/1780], lr: 9.37500e-05, loss: 5.3786
Epoch [13][105/1780], lr: 9.37500e-05, loss: 4.6986
Epoch [13][110/1780], lr: 9.37

Epoch [13][810/1780], lr: 9.37500e-05, loss: 5.4034
Epoch [13][815/1780], lr: 9.37500e-05, loss: 5.3387
Epoch [13][820/1780], lr: 9.37500e-05, loss: 4.8179
Epoch [13][825/1780], lr: 9.37500e-05, loss: 4.869
Epoch [13][830/1780], lr: 9.37500e-05, loss: 4.6719
Epoch [13][835/1780], lr: 9.37500e-05, loss: 4.8342
Epoch [13][840/1780], lr: 9.37500e-05, loss: 4.8615
Epoch [13][845/1780], lr: 9.37500e-05, loss: 4.5985
Epoch [13][850/1780], lr: 9.37500e-05, loss: 4.9365
Epoch [13][855/1780], lr: 9.37500e-05, loss: 5.1512
Epoch [13][860/1780], lr: 9.37500e-05, loss: 5.444
Epoch [13][865/1780], lr: 9.37500e-05, loss: 5.2114
Epoch [13][870/1780], lr: 9.37500e-05, loss: 4.7864
Epoch [13][875/1780], lr: 9.37500e-05, loss: 5.5415
Epoch [13][880/1780], lr: 9.37500e-05, loss: 5.0768
Epoch [13][885/1780], lr: 9.37500e-05, loss: 4.4179
Epoch [13][890/1780], lr: 9.37500e-05, loss: 4.7738
Epoch [13][895/1780], lr: 9.37500e-05, loss: 5.3321
Epoch [13][900/1780], lr: 9.37500e-05, loss: 5.1233
Epoch [13][905

Epoch [13][1590/1780], lr: 9.37500e-05, loss: 4.6047
Epoch [13][1595/1780], lr: 9.37500e-05, loss: 5.2125
Epoch [13][1600/1780], lr: 9.37500e-05, loss: 5.0767
Epoch [13][1605/1780], lr: 9.37500e-05, loss: 5.5221
Epoch [13][1610/1780], lr: 9.37500e-05, loss: 5.3013
Epoch [13][1615/1780], lr: 9.37500e-05, loss: 4.9396
Epoch [13][1620/1780], lr: 9.37500e-05, loss: 4.659
Epoch [13][1625/1780], lr: 9.37500e-05, loss: 4.822
Epoch [13][1630/1780], lr: 9.37500e-05, loss: 4.9695
Epoch [13][1635/1780], lr: 9.37500e-05, loss: 5.5649
Epoch [13][1640/1780], lr: 9.37500e-05, loss: 5.2772
Epoch [13][1645/1780], lr: 9.37500e-05, loss: 5.0421
Epoch [13][1650/1780], lr: 9.37500e-05, loss: 5.3047
Epoch [13][1655/1780], lr: 9.37500e-05, loss: 4.5927
Epoch [13][1660/1780], lr: 9.37500e-05, loss: 5.2902
Epoch [13][1665/1780], lr: 9.37500e-05, loss: 5.428
Epoch [13][1670/1780], lr: 9.37500e-05, loss: 5.9621
Epoch [13][1675/1780], lr: 9.37500e-05, loss: 5.4151
Epoch [13][1680/1780], lr: 9.37500e-05, loss: 4.6

Epoch [14][585/1780], lr: 1.01563e-04, loss: 5.3799
Epoch [14][590/1780], lr: 1.01563e-04, loss: 5.2448
Epoch [14][595/1780], lr: 1.01563e-04, loss: 5.353
Epoch [14][600/1780], lr: 1.01563e-04, loss: 4.9363
Epoch [14][605/1780], lr: 1.01563e-04, loss: 4.6961
Epoch [14][610/1780], lr: 1.01563e-04, loss: 4.7619
Epoch [14][615/1780], lr: 1.01563e-04, loss: 5.2945
Epoch [14][620/1780], lr: 1.01563e-04, loss: 4.7065
Epoch [14][625/1780], lr: 1.01563e-04, loss: 4.3567
Epoch [14][630/1780], lr: 1.01563e-04, loss: 5.5943
Epoch [14][635/1780], lr: 1.01563e-04, loss: 4.7352
Epoch [14][640/1780], lr: 1.01563e-04, loss: 5.3857
Epoch [14][645/1780], lr: 1.01563e-04, loss: 4.8134
Epoch [14][650/1780], lr: 1.01563e-04, loss: 5.073
Epoch [14][655/1780], lr: 1.01563e-04, loss: 5.4817
Epoch [14][660/1780], lr: 1.01563e-04, loss: 4.6289
Epoch [14][665/1780], lr: 1.01563e-04, loss: 4.7182
Epoch [14][670/1780], lr: 1.01563e-04, loss: 5.5495
Epoch [14][675/1780], lr: 1.01563e-04, loss: 4.9322
Epoch [14][680

Epoch [14][1370/1780], lr: 1.01563e-04, loss: 5.2876
Epoch [14][1375/1780], lr: 1.01563e-04, loss: 4.8876
Epoch [14][1380/1780], lr: 1.01563e-04, loss: 5.2803
Epoch [14][1385/1780], lr: 1.01563e-04, loss: 5.4058
Epoch [14][1390/1780], lr: 1.01563e-04, loss: 5.6666
Epoch [14][1395/1780], lr: 1.01563e-04, loss: 5.0987
Epoch [14][1400/1780], lr: 1.01563e-04, loss: 5.1663
Epoch [14][1405/1780], lr: 1.01563e-04, loss: 4.4595
Epoch [14][1410/1780], lr: 1.01563e-04, loss: 5.0756
Epoch [14][1415/1780], lr: 1.01563e-04, loss: 4.9161
Epoch [14][1420/1780], lr: 1.01563e-04, loss: 4.2897
Epoch [14][1425/1780], lr: 1.01563e-04, loss: 4.2948
Epoch [14][1430/1780], lr: 1.01563e-04, loss: 4.253
Epoch [14][1435/1780], lr: 1.01563e-04, loss: 5.4561
Epoch [14][1440/1780], lr: 1.01563e-04, loss: 4.7397
Epoch [14][1445/1780], lr: 1.01563e-04, loss: 5.201
Epoch [14][1450/1780], lr: 1.01563e-04, loss: 5.3672
Epoch [14][1455/1780], lr: 1.01563e-04, loss: 4.5108
Epoch [14][1460/1780], lr: 1.01563e-04, loss: 5.

Epoch [15][360/1780], lr: 1.09375e-04, loss: 4.7744
Epoch [15][365/1780], lr: 1.09375e-04, loss: 4.8528
Epoch [15][370/1780], lr: 1.09375e-04, loss: 5.057
Epoch [15][375/1780], lr: 1.09375e-04, loss: 4.9155
Epoch [15][380/1780], lr: 1.09375e-04, loss: 5.8651
Epoch [15][385/1780], lr: 1.09375e-04, loss: 5.2629
Epoch [15][390/1780], lr: 1.09375e-04, loss: 5.9112
Epoch [15][395/1780], lr: 1.09375e-04, loss: 4.7143
Epoch [15][400/1780], lr: 1.09375e-04, loss: 4.6444
Epoch [15][405/1780], lr: 1.09375e-04, loss: 4.5957
Epoch [15][410/1780], lr: 1.09375e-04, loss: 5.1428
Epoch [15][415/1780], lr: 1.09375e-04, loss: 5.613
Epoch [15][420/1780], lr: 1.09375e-04, loss: 5.5381
Epoch [15][425/1780], lr: 1.09375e-04, loss: 5.2611
Epoch [15][430/1780], lr: 1.09375e-04, loss: 5.1837
Epoch [15][435/1780], lr: 1.09375e-04, loss: 4.875
Epoch [15][440/1780], lr: 1.09375e-04, loss: 5.137
Epoch [15][445/1780], lr: 1.09375e-04, loss: 5.6141
Epoch [15][450/1780], lr: 1.09375e-04, loss: 4.823
Epoch [15][455/17

Epoch [15][1150/1780], lr: 1.09375e-04, loss: 5.4516
Epoch [15][1155/1780], lr: 1.09375e-04, loss: 4.7688
Epoch [15][1160/1780], lr: 1.09375e-04, loss: 4.6702
Epoch [15][1165/1780], lr: 1.09375e-04, loss: 5.1525
Epoch [15][1170/1780], lr: 1.09375e-04, loss: 5.6499
Epoch [15][1175/1780], lr: 1.09375e-04, loss: 5.0045
Epoch [15][1180/1780], lr: 1.09375e-04, loss: 5.1871
Epoch [15][1185/1780], lr: 1.09375e-04, loss: 5.0676
Epoch [15][1190/1780], lr: 1.09375e-04, loss: 4.6358
Epoch [15][1195/1780], lr: 1.09375e-04, loss: 4.4371
Epoch [15][1200/1780], lr: 1.09375e-04, loss: 4.5487
Epoch [15][1205/1780], lr: 1.09375e-04, loss: 5.2704
Epoch [15][1210/1780], lr: 1.09375e-04, loss: 5.3461
Epoch [15][1215/1780], lr: 1.09375e-04, loss: 5.3412
Epoch [15][1220/1780], lr: 1.09375e-04, loss: 5.2391
Epoch [15][1225/1780], lr: 1.09375e-04, loss: 5.0423
Epoch [15][1230/1780], lr: 1.09375e-04, loss: 4.8767
Epoch [15][1235/1780], lr: 1.09375e-04, loss: 5.512
Epoch [15][1240/1780], lr: 1.09375e-04, loss: 4

Epoch [16][140/1780], lr: 1.17187e-04, loss: 5.1244
Epoch [16][145/1780], lr: 1.17187e-04, loss: 4.9803
Epoch [16][150/1780], lr: 1.17187e-04, loss: 4.9741
Epoch [16][155/1780], lr: 1.17187e-04, loss: 5.7042
Epoch [16][160/1780], lr: 1.17187e-04, loss: 5.2723
Epoch [16][165/1780], lr: 1.17187e-04, loss: 4.8761
Epoch [16][170/1780], lr: 1.17187e-04, loss: 4.5452
Epoch [16][175/1780], lr: 1.17187e-04, loss: 5.7235
Epoch [16][180/1780], lr: 1.17187e-04, loss: 5.0772
Epoch [16][185/1780], lr: 1.17187e-04, loss: 4.9284
Epoch [16][190/1780], lr: 1.17187e-04, loss: 5.5886
Epoch [16][195/1780], lr: 1.17187e-04, loss: 4.2891
Epoch [16][200/1780], lr: 1.17187e-04, loss: 4.9612
Epoch [16][205/1780], lr: 1.17187e-04, loss: 5.6969
Epoch [16][210/1780], lr: 1.17187e-04, loss: 5.4264
Epoch [16][215/1780], lr: 1.17187e-04, loss: 4.1314
Epoch [16][220/1780], lr: 1.17187e-04, loss: 6.0023
Epoch [16][225/1780], lr: 1.17187e-04, loss: 5.0512
Epoch [16][230/1780], lr: 1.17187e-04, loss: 4.4444
Epoch [16][2

Epoch [16][930/1780], lr: 1.17187e-04, loss: 4.9556
Epoch [16][935/1780], lr: 1.17187e-04, loss: 5.4905
Epoch [16][940/1780], lr: 1.17187e-04, loss: 5.5612
Epoch [16][945/1780], lr: 1.17187e-04, loss: 5.4153
Epoch [16][950/1780], lr: 1.17187e-04, loss: 4.9761
Epoch [16][955/1780], lr: 1.17187e-04, loss: 5.1083
Epoch [16][960/1780], lr: 1.17187e-04, loss: 4.8324
Epoch [16][965/1780], lr: 1.17187e-04, loss: 5.6679
Epoch [16][970/1780], lr: 1.17187e-04, loss: 4.9526
Epoch [16][975/1780], lr: 1.17187e-04, loss: 5.0432
Epoch [16][980/1780], lr: 1.17187e-04, loss: 4.9003
Epoch [16][985/1780], lr: 1.17187e-04, loss: 4.6685
Epoch [16][990/1780], lr: 1.17187e-04, loss: 4.9571
Epoch [16][995/1780], lr: 1.17187e-04, loss: 4.798
Epoch [16][1000/1780], lr: 1.17187e-04, loss: 4.9519
Epoch [16][1005/1780], lr: 1.17187e-04, loss: 5.4869
Epoch [16][1010/1780], lr: 1.17187e-04, loss: 4.6199
Epoch [16][1015/1780], lr: 1.17187e-04, loss: 4.6161
Epoch [16][1020/1780], lr: 1.17187e-04, loss: 4.9012
Epoch [1

Epoch [16][1710/1780], lr: 1.17187e-04, loss: 4.775
Epoch [16][1715/1780], lr: 1.17187e-04, loss: 5.1821
Epoch [16][1720/1780], lr: 1.17187e-04, loss: 5.3229
Epoch [16][1725/1780], lr: 1.17187e-04, loss: 5.2915
Epoch [16][1730/1780], lr: 1.17187e-04, loss: 4.5713
Epoch [16][1735/1780], lr: 1.17187e-04, loss: 5.4539
Epoch [16][1740/1780], lr: 1.17187e-04, loss: 5.4264
Epoch [16][1745/1780], lr: 1.17187e-04, loss: 4.9485
Epoch [16][1750/1780], lr: 1.17187e-04, loss: 4.3388
Epoch [16][1755/1780], lr: 1.17187e-04, loss: 4.5891
Epoch [16][1760/1780], lr: 1.17187e-04, loss: 4.9025
Epoch [16][1765/1780], lr: 1.17187e-04, loss: 5.3535
Epoch [16][1770/1780], lr: 1.17187e-04, loss: 5.2166
Epoch [16][1775/1780], lr: 1.17187e-04, loss: 4.693
Epoch [16][1780/1780], lr: 1.17187e-04, loss: 5.5152
Evaluating top_k_accuracy...
top1_acc: 0.0155, top5_acc: 0.04651, train_loss: 5.5152, val_loss: 4.8571
Saving checkpoint at 16 epochs...
Epoch [17][5/1780], lr: 1.25000e-04, loss: 5.001
Epoch [17][10/1780], 

Epoch [17][710/1780], lr: 1.25000e-04, loss: 5.2683
Epoch [17][715/1780], lr: 1.25000e-04, loss: 5.4051
Epoch [17][720/1780], lr: 1.25000e-04, loss: 5.354
Epoch [17][725/1780], lr: 1.25000e-04, loss: 4.9145
Epoch [17][730/1780], lr: 1.25000e-04, loss: 5.2453
Epoch [17][735/1780], lr: 1.25000e-04, loss: 4.5809
Epoch [17][740/1780], lr: 1.25000e-04, loss: 5.5659
Epoch [17][745/1780], lr: 1.25000e-04, loss: 5.1607
Epoch [17][750/1780], lr: 1.25000e-04, loss: 5.303
Epoch [17][755/1780], lr: 1.25000e-04, loss: 5.4821
Epoch [17][760/1780], lr: 1.25000e-04, loss: 5.2675
Epoch [17][765/1780], lr: 1.25000e-04, loss: 4.5652
Epoch [17][770/1780], lr: 1.25000e-04, loss: 5.061
Epoch [17][775/1780], lr: 1.25000e-04, loss: 4.4731
Epoch [17][780/1780], lr: 1.25000e-04, loss: 5.2909
Epoch [17][785/1780], lr: 1.25000e-04, loss: 5.6672
Epoch [17][790/1780], lr: 1.25000e-04, loss: 4.9138
Epoch [17][795/1780], lr: 1.25000e-04, loss: 4.8234
Epoch [17][800/1780], lr: 1.25000e-04, loss: 5.0261
Epoch [17][805/

Epoch [17][1490/1780], lr: 1.25000e-04, loss: 5.0161
Epoch [17][1495/1780], lr: 1.25000e-04, loss: 6.0762
Epoch [17][1500/1780], lr: 1.25000e-04, loss: 4.44
Epoch [17][1505/1780], lr: 1.25000e-04, loss: 4.404
Epoch [17][1510/1780], lr: 1.25000e-04, loss: 5.0549
Epoch [17][1515/1780], lr: 1.25000e-04, loss: 5.8603
Epoch [17][1520/1780], lr: 1.25000e-04, loss: 4.7495
Epoch [17][1525/1780], lr: 1.25000e-04, loss: 5.1476
Epoch [17][1530/1780], lr: 1.25000e-04, loss: 4.4528
Epoch [17][1535/1780], lr: 1.25000e-04, loss: 4.6441
Epoch [17][1540/1780], lr: 1.25000e-04, loss: 5.2036
Epoch [17][1545/1780], lr: 1.25000e-04, loss: 5.8093
Epoch [17][1550/1780], lr: 1.25000e-04, loss: 4.8559
Epoch [17][1555/1780], lr: 1.25000e-04, loss: 5.2546
Epoch [17][1560/1780], lr: 1.25000e-04, loss: 5.6204
Epoch [17][1565/1780], lr: 1.25000e-04, loss: 4.9618
Epoch [17][1570/1780], lr: 1.25000e-04, loss: 4.588
Epoch [17][1575/1780], lr: 1.25000e-04, loss: 5.4995
Epoch [17][1580/1780], lr: 1.25000e-04, loss: 6.05

Epoch [18][485/1780], lr: 1.25000e-04, loss: 4.7172
Epoch [18][490/1780], lr: 1.25000e-04, loss: 5.1748
Epoch [18][495/1780], lr: 1.25000e-04, loss: 5.193
Epoch [18][500/1780], lr: 1.25000e-04, loss: 4.4391
Epoch [18][505/1780], lr: 1.25000e-04, loss: 4.7033
Epoch [18][510/1780], lr: 1.25000e-04, loss: 5.5548
Epoch [18][515/1780], lr: 1.25000e-04, loss: 5.4919
Epoch [18][520/1780], lr: 1.25000e-04, loss: 5.1964
Epoch [18][525/1780], lr: 1.25000e-04, loss: 4.521
Epoch [18][530/1780], lr: 1.25000e-04, loss: 4.7889
Epoch [18][535/1780], lr: 1.25000e-04, loss: 5.8701
Epoch [18][540/1780], lr: 1.25000e-04, loss: 4.1098
Epoch [18][545/1780], lr: 1.25000e-04, loss: 4.7974
Epoch [18][550/1780], lr: 1.25000e-04, loss: 4.8943
Epoch [18][555/1780], lr: 1.25000e-04, loss: 4.6503
Epoch [18][560/1780], lr: 1.25000e-04, loss: 4.9815
Epoch [18][565/1780], lr: 1.25000e-04, loss: 4.119
Epoch [18][570/1780], lr: 1.25000e-04, loss: 4.6515
Epoch [18][575/1780], lr: 1.25000e-04, loss: 5.3461
Epoch [18][580/

Epoch [18][1275/1780], lr: 1.25000e-04, loss: 4.6864
Epoch [18][1280/1780], lr: 1.25000e-04, loss: 5.643
Epoch [18][1285/1780], lr: 1.25000e-04, loss: 5.1636
Epoch [18][1290/1780], lr: 1.25000e-04, loss: 5.199
Epoch [18][1295/1780], lr: 1.25000e-04, loss: 5.2957
Epoch [18][1300/1780], lr: 1.25000e-04, loss: 5.1156
Epoch [18][1305/1780], lr: 1.25000e-04, loss: 4.7849
Epoch [18][1310/1780], lr: 1.25000e-04, loss: 5.18
Epoch [18][1315/1780], lr: 1.25000e-04, loss: 5.5065
Epoch [18][1320/1780], lr: 1.25000e-04, loss: 4.6569
Epoch [18][1325/1780], lr: 1.25000e-04, loss: 4.2732
Epoch [18][1330/1780], lr: 1.25000e-04, loss: 5.0628
Epoch [18][1335/1780], lr: 1.25000e-04, loss: 5.4422
Epoch [18][1340/1780], lr: 1.25000e-04, loss: 4.8477
Epoch [18][1345/1780], lr: 1.25000e-04, loss: 4.6382
Epoch [18][1350/1780], lr: 1.25000e-04, loss: 4.711
Epoch [18][1355/1780], lr: 1.25000e-04, loss: 5.3558
Epoch [18][1360/1780], lr: 1.25000e-04, loss: 4.5101
Epoch [18][1365/1780], lr: 1.25000e-04, loss: 4.751

Epoch [19][265/1780], lr: 1.25000e-04, loss: 4.9596
Epoch [19][270/1780], lr: 1.25000e-04, loss: 5.2533
Epoch [19][275/1780], lr: 1.25000e-04, loss: 5.266
Epoch [19][280/1780], lr: 1.25000e-04, loss: 5.0543
Epoch [19][285/1780], lr: 1.25000e-04, loss: 5.4256
Epoch [19][290/1780], lr: 1.25000e-04, loss: 4.7266
Epoch [19][295/1780], lr: 1.25000e-04, loss: 5.4415
Epoch [19][300/1780], lr: 1.25000e-04, loss: 5.1221
Epoch [19][305/1780], lr: 1.25000e-04, loss: 5.397
Epoch [19][310/1780], lr: 1.25000e-04, loss: 4.0391
Epoch [19][315/1780], lr: 1.25000e-04, loss: 4.5987
Epoch [19][320/1780], lr: 1.25000e-04, loss: 5.0148
Epoch [19][325/1780], lr: 1.25000e-04, loss: 5.0905
Epoch [19][330/1780], lr: 1.25000e-04, loss: 5.6455
Epoch [19][335/1780], lr: 1.25000e-04, loss: 5.2571
Epoch [19][340/1780], lr: 1.25000e-04, loss: 4.8521
Epoch [19][345/1780], lr: 1.25000e-04, loss: 5.7901
Epoch [19][350/1780], lr: 1.25000e-04, loss: 4.6889
Epoch [19][355/1780], lr: 1.25000e-04, loss: 5.4714
Epoch [19][360

Epoch [19][1055/1780], lr: 1.25000e-04, loss: 4.6885
Epoch [19][1060/1780], lr: 1.25000e-04, loss: 4.8461
Epoch [19][1065/1780], lr: 1.25000e-04, loss: 5.3372
Epoch [19][1070/1780], lr: 1.25000e-04, loss: 4.6408
Epoch [19][1075/1780], lr: 1.25000e-04, loss: 5.2734
Epoch [19][1080/1780], lr: 1.25000e-04, loss: 4.658
Epoch [19][1085/1780], lr: 1.25000e-04, loss: 5.2195
Epoch [19][1090/1780], lr: 1.25000e-04, loss: 4.8671
Epoch [19][1095/1780], lr: 1.25000e-04, loss: 4.8628
Epoch [19][1100/1780], lr: 1.25000e-04, loss: 4.3852
Epoch [19][1105/1780], lr: 1.25000e-04, loss: 4.5332
Epoch [19][1110/1780], lr: 1.25000e-04, loss: 5.1102
Epoch [19][1115/1780], lr: 1.25000e-04, loss: 5.1835
Epoch [19][1120/1780], lr: 1.25000e-04, loss: 5.4187
Epoch [19][1125/1780], lr: 1.25000e-04, loss: 4.4919
Epoch [19][1130/1780], lr: 1.25000e-04, loss: 4.5304
Epoch [19][1135/1780], lr: 1.25000e-04, loss: 5.2922
Epoch [19][1140/1780], lr: 1.25000e-04, loss: 5.5024
Epoch [19][1145/1780], lr: 1.25000e-04, loss: 5

Epoch [20][40/1780], lr: 1.25000e-04, loss: 5.0629
Epoch [20][45/1780], lr: 1.25000e-04, loss: 4.8363
Epoch [20][50/1780], lr: 1.25000e-04, loss: 5.1104
Epoch [20][55/1780], lr: 1.25000e-04, loss: 5.3059
Epoch [20][60/1780], lr: 1.25000e-04, loss: 5.4639
Epoch [20][65/1780], lr: 1.25000e-04, loss: 5.4795
Epoch [20][70/1780], lr: 1.25000e-04, loss: 4.8489
Epoch [20][75/1780], lr: 1.25000e-04, loss: 4.8126
Epoch [20][80/1780], lr: 1.25000e-04, loss: 4.8642
Epoch [20][85/1780], lr: 1.25000e-04, loss: 4.6619
Epoch [20][90/1780], lr: 1.25000e-04, loss: 4.8389
Epoch [20][95/1780], lr: 1.25000e-04, loss: 5.0605
Epoch [20][100/1780], lr: 1.25000e-04, loss: 5.8052
Epoch [20][105/1780], lr: 1.25000e-04, loss: 4.6718
Epoch [20][110/1780], lr: 1.25000e-04, loss: 4.6602
Epoch [20][115/1780], lr: 1.25000e-04, loss: 4.9698
Epoch [20][120/1780], lr: 1.25000e-04, loss: 5.1794
Epoch [20][125/1780], lr: 1.25000e-04, loss: 4.6435
Epoch [20][130/1780], lr: 1.25000e-04, loss: 6.0431
Epoch [20][135/1780], lr

Epoch [20][835/1780], lr: 1.25000e-04, loss: 4.9114
Epoch [20][840/1780], lr: 1.25000e-04, loss: 4.8313
Epoch [20][845/1780], lr: 1.25000e-04, loss: 4.8324
Epoch [20][850/1780], lr: 1.25000e-04, loss: 5.5216
Epoch [20][855/1780], lr: 1.25000e-04, loss: 5.4585
Epoch [20][860/1780], lr: 1.25000e-04, loss: 5.0436
Epoch [20][865/1780], lr: 1.25000e-04, loss: 5.3773
Epoch [20][870/1780], lr: 1.25000e-04, loss: 5.0206
Epoch [20][875/1780], lr: 1.25000e-04, loss: 4.8737
Epoch [20][880/1780], lr: 1.25000e-04, loss: 4.7365
Epoch [20][885/1780], lr: 1.25000e-04, loss: 4.8999
Epoch [20][890/1780], lr: 1.25000e-04, loss: 4.0768
Epoch [20][895/1780], lr: 1.25000e-04, loss: 4.8158
Epoch [20][900/1780], lr: 1.25000e-04, loss: 5.7861
Epoch [20][905/1780], lr: 1.25000e-04, loss: 4.4028
Epoch [20][910/1780], lr: 1.25000e-04, loss: 5.3344
Epoch [20][915/1780], lr: 1.25000e-04, loss: 5.4935
Epoch [20][920/1780], lr: 1.25000e-04, loss: 6.248
Epoch [20][925/1780], lr: 1.25000e-04, loss: 4.9435
Epoch [20][93

Epoch [20][1615/1780], lr: 1.25000e-04, loss: 4.1142
Epoch [20][1620/1780], lr: 1.25000e-04, loss: 5.5196
Epoch [20][1625/1780], lr: 1.25000e-04, loss: 4.9076
Epoch [20][1630/1780], lr: 1.25000e-04, loss: 4.9086
Epoch [20][1635/1780], lr: 1.25000e-04, loss: 5.2795
Epoch [20][1640/1780], lr: 1.25000e-04, loss: 4.7367
Epoch [20][1645/1780], lr: 1.25000e-04, loss: 4.9685
Epoch [20][1650/1780], lr: 1.25000e-04, loss: 5.2273
Epoch [20][1655/1780], lr: 1.25000e-04, loss: 4.9476
Epoch [20][1660/1780], lr: 1.25000e-04, loss: 5.5147
Epoch [20][1665/1780], lr: 1.25000e-04, loss: 5.2772
Epoch [20][1670/1780], lr: 1.25000e-04, loss: 5.0363
Epoch [20][1675/1780], lr: 1.25000e-04, loss: 4.4084
Epoch [20][1680/1780], lr: 1.25000e-04, loss: 4.5472
Epoch [20][1685/1780], lr: 1.25000e-04, loss: 5.1026
Epoch [20][1690/1780], lr: 1.25000e-04, loss: 5.1036
Epoch [20][1695/1780], lr: 1.25000e-04, loss: 5.2634
Epoch [20][1700/1780], lr: 1.25000e-04, loss: 5.1581
Epoch [20][1705/1780], lr: 1.25000e-04, loss: 

Epoch [21][615/1780], lr: 1.25000e-04, loss: 5.8183
Epoch [21][620/1780], lr: 1.25000e-04, loss: 5.7556
Epoch [21][625/1780], lr: 1.25000e-04, loss: 6.1659
Epoch [21][630/1780], lr: 1.25000e-04, loss: 4.4598
Epoch [21][635/1780], lr: 1.25000e-04, loss: 5.0042
Epoch [21][640/1780], lr: 1.25000e-04, loss: 4.6106
Epoch [21][645/1780], lr: 1.25000e-04, loss: 4.9639
Epoch [21][650/1780], lr: 1.25000e-04, loss: 4.6034
Epoch [21][655/1780], lr: 1.25000e-04, loss: 4.7247
Epoch [21][660/1780], lr: 1.25000e-04, loss: 4.3234
Epoch [21][665/1780], lr: 1.25000e-04, loss: 4.9548
Epoch [21][670/1780], lr: 1.25000e-04, loss: 4.7153
Epoch [21][675/1780], lr: 1.25000e-04, loss: 5.0904
Epoch [21][680/1780], lr: 1.25000e-04, loss: 5.4684
Epoch [21][685/1780], lr: 1.25000e-04, loss: 5.1527
Epoch [21][690/1780], lr: 1.25000e-04, loss: 5.4983
Epoch [21][695/1780], lr: 1.25000e-04, loss: 5.5417
Epoch [21][700/1780], lr: 1.25000e-04, loss: 4.5818
Epoch [21][705/1780], lr: 1.25000e-04, loss: 4.9002
Epoch [21][7

Epoch [21][1400/1780], lr: 1.25000e-04, loss: 4.7759
Epoch [21][1405/1780], lr: 1.25000e-04, loss: 5.119
Epoch [21][1410/1780], lr: 1.25000e-04, loss: 5.1714
Epoch [21][1415/1780], lr: 1.25000e-04, loss: 4.4488
Epoch [21][1420/1780], lr: 1.25000e-04, loss: 4.7473
Epoch [21][1425/1780], lr: 1.25000e-04, loss: 5.2194
Epoch [21][1430/1780], lr: 1.25000e-04, loss: 4.6086
Epoch [21][1435/1780], lr: 1.25000e-04, loss: 4.8146
Epoch [21][1440/1780], lr: 1.25000e-04, loss: 4.6524
Epoch [21][1445/1780], lr: 1.25000e-04, loss: 4.1022
Epoch [21][1450/1780], lr: 1.25000e-04, loss: 4.6675
Epoch [21][1455/1780], lr: 1.25000e-04, loss: 5.2052
Epoch [21][1460/1780], lr: 1.25000e-04, loss: 5.5771
Epoch [21][1465/1780], lr: 1.25000e-04, loss: 4.9716
Epoch [21][1470/1780], lr: 1.25000e-04, loss: 4.663
Epoch [21][1475/1780], lr: 1.25000e-04, loss: 3.9613
Epoch [21][1480/1780], lr: 1.25000e-04, loss: 5.03
Epoch [21][1485/1780], lr: 1.25000e-04, loss: 4.674
Epoch [21][1490/1780], lr: 1.25000e-04, loss: 4.647

Epoch [22][395/1780], lr: 1.25000e-04, loss: 4.651
Epoch [22][400/1780], lr: 1.25000e-04, loss: 5.4412
Epoch [22][405/1780], lr: 1.25000e-04, loss: 4.1101
Epoch [22][410/1780], lr: 1.25000e-04, loss: 5.6417
Epoch [22][415/1780], lr: 1.25000e-04, loss: 4.4974
Epoch [22][420/1780], lr: 1.25000e-04, loss: 5.3773
Epoch [22][425/1780], lr: 1.25000e-04, loss: 5.2357
Epoch [22][430/1780], lr: 1.25000e-04, loss: 5.5623
Epoch [22][435/1780], lr: 1.25000e-04, loss: 4.6391
Epoch [22][440/1780], lr: 1.25000e-04, loss: 4.8232
Epoch [22][445/1780], lr: 1.25000e-04, loss: 4.5241
Epoch [22][450/1780], lr: 1.25000e-04, loss: 4.6265
Epoch [22][455/1780], lr: 1.25000e-04, loss: 4.8412
Epoch [22][460/1780], lr: 1.25000e-04, loss: 4.6173
Epoch [22][465/1780], lr: 1.25000e-04, loss: 4.8871
Epoch [22][470/1780], lr: 1.25000e-04, loss: 4.5037
Epoch [22][475/1780], lr: 1.25000e-04, loss: 5.3187
Epoch [22][480/1780], lr: 1.25000e-04, loss: 5.2104
Epoch [22][485/1780], lr: 1.25000e-04, loss: 5.4701
Epoch [22][49

Epoch [22][1185/1780], lr: 1.25000e-04, loss: 3.9949
Epoch [22][1190/1780], lr: 1.25000e-04, loss: 4.2854
Epoch [22][1195/1780], lr: 1.25000e-04, loss: 4.6037
Epoch [22][1200/1780], lr: 1.25000e-04, loss: 4.6907
Epoch [22][1205/1780], lr: 1.25000e-04, loss: 4.9239
Epoch [22][1210/1780], lr: 1.25000e-04, loss: 4.8038
Epoch [22][1215/1780], lr: 1.25000e-04, loss: 5.9809
Epoch [22][1220/1780], lr: 1.25000e-04, loss: 5.2146
Epoch [22][1225/1780], lr: 1.25000e-04, loss: 5.2439
Epoch [22][1230/1780], lr: 1.25000e-04, loss: 4.5341
Epoch [22][1235/1780], lr: 1.25000e-04, loss: 5.5871
Epoch [22][1240/1780], lr: 1.25000e-04, loss: 4.6613
Epoch [22][1245/1780], lr: 1.25000e-04, loss: 5.288
Epoch [22][1250/1780], lr: 1.25000e-04, loss: 4.5004
Epoch [22][1255/1780], lr: 1.25000e-04, loss: 5.1328
Epoch [22][1260/1780], lr: 1.25000e-04, loss: 5.0056
Epoch [22][1265/1780], lr: 1.25000e-04, loss: 5.3998
Epoch [22][1270/1780], lr: 1.25000e-04, loss: 4.5793
Epoch [22][1275/1780], lr: 1.25000e-04, loss: 4

Epoch [23][175/1780], lr: 1.25000e-04, loss: 3.9964
Epoch [23][180/1780], lr: 1.25000e-04, loss: 4.7056
Epoch [23][185/1780], lr: 1.25000e-04, loss: 5.4803
Epoch [23][190/1780], lr: 1.25000e-04, loss: 5.2313
Epoch [23][195/1780], lr: 1.25000e-04, loss: 4.8321
Epoch [23][200/1780], lr: 1.25000e-04, loss: 4.7805
Epoch [23][205/1780], lr: 1.25000e-04, loss: 4.8716
Epoch [23][210/1780], lr: 1.25000e-04, loss: 5.1223
Epoch [23][215/1780], lr: 1.25000e-04, loss: 4.7133
Epoch [23][220/1780], lr: 1.25000e-04, loss: 5.2874
Epoch [23][225/1780], lr: 1.25000e-04, loss: 5.4301
Epoch [23][230/1780], lr: 1.25000e-04, loss: 4.95
Epoch [23][235/1780], lr: 1.25000e-04, loss: 5.3215
Epoch [23][240/1780], lr: 1.25000e-04, loss: 5.2207
Epoch [23][245/1780], lr: 1.25000e-04, loss: 4.6413
Epoch [23][250/1780], lr: 1.25000e-04, loss: 5.3439
Epoch [23][255/1780], lr: 1.25000e-04, loss: 4.4991
Epoch [23][260/1780], lr: 1.25000e-04, loss: 4.3336
Epoch [23][265/1780], lr: 1.25000e-04, loss: 5.1022
Epoch [23][270

Epoch [23][965/1780], lr: 1.25000e-04, loss: 4.1418
Epoch [23][970/1780], lr: 1.25000e-04, loss: 5.0579
Epoch [23][975/1780], lr: 1.25000e-04, loss: 4.7973
Epoch [23][980/1780], lr: 1.25000e-04, loss: 4.4514
Epoch [23][985/1780], lr: 1.25000e-04, loss: 5.1816
Epoch [23][990/1780], lr: 1.25000e-04, loss: 6.1475
Epoch [23][995/1780], lr: 1.25000e-04, loss: 4.9387
Epoch [23][1000/1780], lr: 1.25000e-04, loss: 4.8802
Epoch [23][1005/1780], lr: 1.25000e-04, loss: 4.6593
Epoch [23][1010/1780], lr: 1.25000e-04, loss: 4.0637
Epoch [23][1015/1780], lr: 1.25000e-04, loss: 4.6404
Epoch [23][1020/1780], lr: 1.25000e-04, loss: 5.3396
Epoch [23][1025/1780], lr: 1.25000e-04, loss: 4.7431
Epoch [23][1030/1780], lr: 1.25000e-04, loss: 5.6294
Epoch [23][1035/1780], lr: 1.25000e-04, loss: 4.3188
Epoch [23][1040/1780], lr: 1.25000e-04, loss: 4.7492
Epoch [23][1045/1780], lr: 1.25000e-04, loss: 4.4877
Epoch [23][1050/1780], lr: 1.25000e-04, loss: 5.3691
Epoch [23][1055/1780], lr: 1.25000e-04, loss: 4.6929


Epoch [23][1745/1780], lr: 1.25000e-04, loss: 3.5415
Epoch [23][1750/1780], lr: 1.25000e-04, loss: 4.3602
Epoch [23][1755/1780], lr: 1.25000e-04, loss: 4.4261
Epoch [23][1760/1780], lr: 1.25000e-04, loss: 5.686
Epoch [23][1765/1780], lr: 1.25000e-04, loss: 5.0881
Epoch [23][1770/1780], lr: 1.25000e-04, loss: 4.856
Epoch [23][1775/1780], lr: 1.25000e-04, loss: 5.2058
Epoch [23][1780/1780], lr: 1.25000e-04, loss: 4.793
Evaluating top_k_accuracy...
top1_acc: 0.01163, top5_acc: 0.06589, train_loss: 4.793, val_loss: 4.8878
Saving checkpoint at 23 epochs...
Epoch [24][5/1780], lr: 1.25000e-04, loss: 4.8411
Epoch [24][10/1780], lr: 1.25000e-04, loss: 5.1579
Epoch [24][15/1780], lr: 1.25000e-04, loss: 5.1201
Epoch [24][20/1780], lr: 1.25000e-04, loss: 5.7357
Epoch [24][25/1780], lr: 1.25000e-04, loss: 4.9431
Epoch [24][30/1780], lr: 1.25000e-04, loss: 4.6357
Epoch [24][35/1780], lr: 1.25000e-04, loss: 4.6373
Epoch [24][40/1780], lr: 1.25000e-04, loss: 4.6671
Epoch [24][45/1780], lr: 1.25000e-0

Epoch [24][745/1780], lr: 1.25000e-04, loss: 4.8744
Epoch [24][750/1780], lr: 1.25000e-04, loss: 4.811
Epoch [24][755/1780], lr: 1.25000e-04, loss: 5.3917
Epoch [24][760/1780], lr: 1.25000e-04, loss: 4.4364
Epoch [24][765/1780], lr: 1.25000e-04, loss: 4.6695
Epoch [24][770/1780], lr: 1.25000e-04, loss: 5.2665
Epoch [24][775/1780], lr: 1.25000e-04, loss: 5.7494
Epoch [24][780/1780], lr: 1.25000e-04, loss: 5.0598
Epoch [24][785/1780], lr: 1.25000e-04, loss: 4.9842
Epoch [24][790/1780], lr: 1.25000e-04, loss: 4.6785
Epoch [24][795/1780], lr: 1.25000e-04, loss: 5.0689
Epoch [24][800/1780], lr: 1.25000e-04, loss: 5.0481
Epoch [24][805/1780], lr: 1.25000e-04, loss: 5.2235
Epoch [24][810/1780], lr: 1.25000e-04, loss: 4.7675
Epoch [24][815/1780], lr: 1.25000e-04, loss: 5.3414
Epoch [24][820/1780], lr: 1.25000e-04, loss: 5.0254
Epoch [24][825/1780], lr: 1.25000e-04, loss: 4.2485
Epoch [24][830/1780], lr: 1.25000e-04, loss: 4.7076
Epoch [24][835/1780], lr: 1.25000e-04, loss: 4.9293
Epoch [24][84

Epoch [24][1525/1780], lr: 1.25000e-04, loss: 5.5848
Epoch [24][1530/1780], lr: 1.25000e-04, loss: 4.9226
Epoch [24][1535/1780], lr: 1.25000e-04, loss: 5.5049
Epoch [24][1540/1780], lr: 1.25000e-04, loss: 4.3577
Epoch [24][1545/1780], lr: 1.25000e-04, loss: 5.444
Epoch [24][1550/1780], lr: 1.25000e-04, loss: 5.5669
Epoch [24][1555/1780], lr: 1.25000e-04, loss: 5.4093
Epoch [24][1560/1780], lr: 1.25000e-04, loss: 4.0622
Epoch [24][1565/1780], lr: 1.25000e-04, loss: 4.2099
Epoch [24][1570/1780], lr: 1.25000e-04, loss: 4.9764
Epoch [24][1575/1780], lr: 1.25000e-04, loss: 4.996
Epoch [24][1580/1780], lr: 1.25000e-04, loss: 4.6057
Epoch [24][1585/1780], lr: 1.25000e-04, loss: 4.9559
Epoch [24][1590/1780], lr: 1.25000e-04, loss: 5.2458
Epoch [24][1595/1780], lr: 1.25000e-04, loss: 4.6591
Epoch [24][1600/1780], lr: 1.25000e-04, loss: 4.8782
Epoch [24][1605/1780], lr: 1.25000e-04, loss: 5.3388
Epoch [24][1610/1780], lr: 1.25000e-04, loss: 5.0581
Epoch [24][1615/1780], lr: 1.25000e-04, loss: 4.

Epoch [25][520/1780], lr: 1.25000e-04, loss: 4.3452
Epoch [25][525/1780], lr: 1.25000e-04, loss: 4.9741
Epoch [25][530/1780], lr: 1.25000e-04, loss: 4.294
Epoch [25][535/1780], lr: 1.25000e-04, loss: 4.4897
Epoch [25][540/1780], lr: 1.25000e-04, loss: 5.9125
Epoch [25][545/1780], lr: 1.25000e-04, loss: 5.5076
Epoch [25][550/1780], lr: 1.25000e-04, loss: 4.9986
Epoch [25][555/1780], lr: 1.25000e-04, loss: 4.6362
Epoch [25][560/1780], lr: 1.25000e-04, loss: 4.9967
Epoch [25][565/1780], lr: 1.25000e-04, loss: 4.6275
Epoch [25][570/1780], lr: 1.25000e-04, loss: 4.0908
Epoch [25][575/1780], lr: 1.25000e-04, loss: 4.8475
Epoch [25][580/1780], lr: 1.25000e-04, loss: 4.8662
Epoch [25][585/1780], lr: 1.25000e-04, loss: 5.6263
Epoch [25][590/1780], lr: 1.25000e-04, loss: 4.9009
Epoch [25][595/1780], lr: 1.25000e-04, loss: 4.5487
Epoch [25][600/1780], lr: 1.25000e-04, loss: 4.9025
Epoch [25][605/1780], lr: 1.25000e-04, loss: 4.8292
Epoch [25][610/1780], lr: 1.25000e-04, loss: 4.4123
Epoch [25][61

Epoch [25][1305/1780], lr: 1.25000e-04, loss: 4.2368
Epoch [25][1310/1780], lr: 1.25000e-04, loss: 4.9869
Epoch [25][1315/1780], lr: 1.25000e-04, loss: 5.2657
Epoch [25][1320/1780], lr: 1.25000e-04, loss: 5.3301
Epoch [25][1325/1780], lr: 1.25000e-04, loss: 4.616
Epoch [25][1330/1780], lr: 1.25000e-04, loss: 5.686
Epoch [25][1335/1780], lr: 1.25000e-04, loss: 4.2103
Epoch [25][1340/1780], lr: 1.25000e-04, loss: 4.2109
Epoch [25][1345/1780], lr: 1.25000e-04, loss: 4.8882
Epoch [25][1350/1780], lr: 1.25000e-04, loss: 4.3921
Epoch [25][1355/1780], lr: 1.25000e-04, loss: 4.9382
Epoch [25][1360/1780], lr: 1.25000e-04, loss: 4.1772
Epoch [25][1365/1780], lr: 1.25000e-04, loss: 5.2941
Epoch [25][1370/1780], lr: 1.25000e-04, loss: 5.4669
Epoch [25][1375/1780], lr: 1.25000e-04, loss: 4.9317
Epoch [25][1380/1780], lr: 1.25000e-04, loss: 5.0574
Epoch [25][1385/1780], lr: 1.25000e-04, loss: 5.1075
Epoch [25][1390/1780], lr: 1.25000e-04, loss: 5.099
Epoch [25][1395/1780], lr: 1.25000e-04, loss: 4.9

Epoch [26][295/1780], lr: 1.25000e-04, loss: 4.9359
Epoch [26][300/1780], lr: 1.25000e-04, loss: 4.8032
Epoch [26][305/1780], lr: 1.25000e-04, loss: 4.9072
Epoch [26][310/1780], lr: 1.25000e-04, loss: 3.9999
Epoch [26][315/1780], lr: 1.25000e-04, loss: 4.9976
Epoch [26][320/1780], lr: 1.25000e-04, loss: 5.384
Epoch [26][325/1780], lr: 1.25000e-04, loss: 5.0015
Epoch [26][330/1780], lr: 1.25000e-04, loss: 5.1943
Epoch [26][335/1780], lr: 1.25000e-04, loss: 4.9666
Epoch [26][340/1780], lr: 1.25000e-04, loss: 4.8412
Epoch [26][345/1780], lr: 1.25000e-04, loss: 4.1246
Epoch [26][350/1780], lr: 1.25000e-04, loss: 4.2617
Epoch [26][355/1780], lr: 1.25000e-04, loss: 4.9502
Epoch [26][360/1780], lr: 1.25000e-04, loss: 5.1109
Epoch [26][365/1780], lr: 1.25000e-04, loss: 4.1079
Epoch [26][370/1780], lr: 1.25000e-04, loss: 5.1489
Epoch [26][375/1780], lr: 1.25000e-04, loss: 5.2574
Epoch [26][380/1780], lr: 1.25000e-04, loss: 4.5419
Epoch [26][385/1780], lr: 1.25000e-04, loss: 5.2203
Epoch [26][39

Epoch [26][1085/1780], lr: 1.25000e-04, loss: 5.0737
Epoch [26][1090/1780], lr: 1.25000e-04, loss: 4.6718
Epoch [26][1095/1780], lr: 1.25000e-04, loss: 4.2609
Epoch [26][1100/1780], lr: 1.25000e-04, loss: 4.4397
Epoch [26][1105/1780], lr: 1.25000e-04, loss: 4.7565
Epoch [26][1110/1780], lr: 1.25000e-04, loss: 5.1149
Epoch [26][1115/1780], lr: 1.25000e-04, loss: 4.3746
Epoch [26][1120/1780], lr: 1.25000e-04, loss: 5.364
Epoch [26][1125/1780], lr: 1.25000e-04, loss: 5.3204
Epoch [26][1130/1780], lr: 1.25000e-04, loss: 4.9416
Epoch [26][1135/1780], lr: 1.25000e-04, loss: 4.7054
Epoch [26][1140/1780], lr: 1.25000e-04, loss: 5.077
Epoch [26][1145/1780], lr: 1.25000e-04, loss: 4.7684
Epoch [26][1150/1780], lr: 1.25000e-04, loss: 5.1518
Epoch [26][1155/1780], lr: 1.25000e-04, loss: 4.5358
Epoch [26][1160/1780], lr: 1.25000e-04, loss: 4.727
Epoch [26][1165/1780], lr: 1.25000e-04, loss: 5.3967
Epoch [26][1170/1780], lr: 1.25000e-04, loss: 4.5291
Epoch [26][1175/1780], lr: 1.25000e-04, loss: 4.3

Epoch [27][70/1780], lr: 1.25000e-04, loss: 5.0693
Epoch [27][75/1780], lr: 1.25000e-04, loss: 5.2844
Epoch [27][80/1780], lr: 1.25000e-04, loss: 5.0687
Epoch [27][85/1780], lr: 1.25000e-04, loss: 4.7855
Epoch [27][90/1780], lr: 1.25000e-04, loss: 4.4361
Epoch [27][95/1780], lr: 1.25000e-04, loss: 4.4465
Epoch [27][100/1780], lr: 1.25000e-04, loss: 4.7239
Epoch [27][105/1780], lr: 1.25000e-04, loss: 4.7791
Epoch [27][110/1780], lr: 1.25000e-04, loss: 4.5528
Epoch [27][115/1780], lr: 1.25000e-04, loss: 4.6022
Epoch [27][120/1780], lr: 1.25000e-04, loss: 4.6844
Epoch [27][125/1780], lr: 1.25000e-04, loss: 4.7761
Epoch [27][130/1780], lr: 1.25000e-04, loss: 4.7403
Epoch [27][135/1780], lr: 1.25000e-04, loss: 5.0753
Epoch [27][140/1780], lr: 1.25000e-04, loss: 4.7053
Epoch [27][145/1780], lr: 1.25000e-04, loss: 4.3187
Epoch [27][150/1780], lr: 1.25000e-04, loss: 3.4878
Epoch [27][155/1780], lr: 1.25000e-04, loss: 5.0431
Epoch [27][160/1780], lr: 1.25000e-04, loss: 4.3022
Epoch [27][165/178

Epoch [27][860/1780], lr: 1.25000e-04, loss: 4.7468
Epoch [27][865/1780], lr: 1.25000e-04, loss: 5.4007
Epoch [27][870/1780], lr: 1.25000e-04, loss: 4.8558
Epoch [27][875/1780], lr: 1.25000e-04, loss: 3.9329
Epoch [27][880/1780], lr: 1.25000e-04, loss: 4.4209
Epoch [27][885/1780], lr: 1.25000e-04, loss: 4.4621
Epoch [27][890/1780], lr: 1.25000e-04, loss: 5.1332
Epoch [27][895/1780], lr: 1.25000e-04, loss: 4.9437
Epoch [27][900/1780], lr: 1.25000e-04, loss: 5.3046
Epoch [27][905/1780], lr: 1.25000e-04, loss: 4.8584
Epoch [27][910/1780], lr: 1.25000e-04, loss: 4.5759
Epoch [27][915/1780], lr: 1.25000e-04, loss: 5.175
Epoch [27][920/1780], lr: 1.25000e-04, loss: 3.6684
Epoch [27][925/1780], lr: 1.25000e-04, loss: 4.2563
Epoch [27][930/1780], lr: 1.25000e-04, loss: 4.3193
Epoch [27][935/1780], lr: 1.25000e-04, loss: 4.8371
Epoch [27][940/1780], lr: 1.25000e-04, loss: 5.0432
Epoch [27][945/1780], lr: 1.25000e-04, loss: 4.5871
Epoch [27][950/1780], lr: 1.25000e-04, loss: 5.266
Epoch [27][955

Epoch [27][1640/1780], lr: 1.25000e-04, loss: 5.1018
Epoch [27][1645/1780], lr: 1.25000e-04, loss: 5.1416
Epoch [27][1650/1780], lr: 1.25000e-04, loss: 4.3196
Epoch [27][1655/1780], lr: 1.25000e-04, loss: 5.5577
Epoch [27][1660/1780], lr: 1.25000e-04, loss: 4.4074
Epoch [27][1665/1780], lr: 1.25000e-04, loss: 4.7945
Epoch [27][1670/1780], lr: 1.25000e-04, loss: 4.835
Epoch [27][1675/1780], lr: 1.25000e-04, loss: 4.725
Epoch [27][1680/1780], lr: 1.25000e-04, loss: 5.0848
Epoch [27][1685/1780], lr: 1.25000e-04, loss: 4.1248
Epoch [27][1690/1780], lr: 1.25000e-04, loss: 4.2527
Epoch [27][1695/1780], lr: 1.25000e-04, loss: 5.5695
Epoch [27][1700/1780], lr: 1.25000e-04, loss: 4.8742
Epoch [27][1705/1780], lr: 1.25000e-04, loss: 4.3435
Epoch [27][1710/1780], lr: 1.25000e-04, loss: 4.7004
Epoch [27][1715/1780], lr: 1.25000e-04, loss: 4.8644
Epoch [27][1720/1780], lr: 1.25000e-04, loss: 5.3302
Epoch [27][1725/1780], lr: 1.25000e-04, loss: 5.0845
Epoch [27][1730/1780], lr: 1.25000e-04, loss: 4.

Epoch [28][635/1780], lr: 1.25000e-04, loss: 4.9034
Epoch [28][640/1780], lr: 1.25000e-04, loss: 5.0307
Epoch [28][645/1780], lr: 1.25000e-04, loss: 4.893
Epoch [28][650/1780], lr: 1.25000e-04, loss: 4.8049
Epoch [28][655/1780], lr: 1.25000e-04, loss: 4.8742
Epoch [28][660/1780], lr: 1.25000e-04, loss: 4.755
Epoch [28][665/1780], lr: 1.25000e-04, loss: 4.7469
Epoch [28][670/1780], lr: 1.25000e-04, loss: 4.361
Epoch [28][675/1780], lr: 1.25000e-04, loss: 4.4764
Epoch [28][680/1780], lr: 1.25000e-04, loss: 4.7971
Epoch [28][685/1780], lr: 1.25000e-04, loss: 5.4704
Epoch [28][690/1780], lr: 1.25000e-04, loss: 4.1027
Epoch [28][695/1780], lr: 1.25000e-04, loss: 4.6022
Epoch [28][700/1780], lr: 1.25000e-04, loss: 5.4764
Epoch [28][705/1780], lr: 1.25000e-04, loss: 5.2317
Epoch [28][710/1780], lr: 1.25000e-04, loss: 4.5193
Epoch [28][715/1780], lr: 1.25000e-04, loss: 4.9015
Epoch [28][720/1780], lr: 1.25000e-04, loss: 4.1971
Epoch [28][725/1780], lr: 1.25000e-04, loss: 4.8758
Epoch [28][730/

Epoch [28][1420/1780], lr: 1.25000e-04, loss: 5.1025
Epoch [28][1425/1780], lr: 1.25000e-04, loss: 4.6786
Epoch [28][1430/1780], lr: 1.25000e-04, loss: 5.1474
Epoch [28][1435/1780], lr: 1.25000e-04, loss: 4.5306
Epoch [28][1440/1780], lr: 1.25000e-04, loss: 4.1891
Epoch [28][1445/1780], lr: 1.25000e-04, loss: 5.2793
Epoch [28][1450/1780], lr: 1.25000e-04, loss: 5.0751
Epoch [28][1455/1780], lr: 1.25000e-04, loss: 4.9706
Epoch [28][1460/1780], lr: 1.25000e-04, loss: 4.4817
Epoch [28][1465/1780], lr: 1.25000e-04, loss: 4.7055
Epoch [28][1470/1780], lr: 1.25000e-04, loss: 3.7892
Epoch [28][1475/1780], lr: 1.25000e-04, loss: 3.6345
Epoch [28][1480/1780], lr: 1.25000e-04, loss: 4.6285
Epoch [28][1485/1780], lr: 1.25000e-04, loss: 4.6445
Epoch [28][1490/1780], lr: 1.25000e-04, loss: 4.5908
Epoch [28][1495/1780], lr: 1.25000e-04, loss: 4.7389
Epoch [28][1500/1780], lr: 1.25000e-04, loss: 4.7617
Epoch [28][1505/1780], lr: 1.25000e-04, loss: 4.5724
Epoch [28][1510/1780], lr: 1.25000e-04, loss: 

Epoch [29][415/1780], lr: 1.25000e-04, loss: 5.1507
Epoch [29][420/1780], lr: 1.25000e-04, loss: 4.6002
Epoch [29][425/1780], lr: 1.25000e-04, loss: 5.4666
Epoch [29][430/1780], lr: 1.25000e-04, loss: 4.1041
Epoch [29][435/1780], lr: 1.25000e-04, loss: 4.6486
Epoch [29][440/1780], lr: 1.25000e-04, loss: 4.6285
Epoch [29][445/1780], lr: 1.25000e-04, loss: 4.5388
Epoch [29][450/1780], lr: 1.25000e-04, loss: 4.04
Epoch [29][455/1780], lr: 1.25000e-04, loss: 4.6039
Epoch [29][460/1780], lr: 1.25000e-04, loss: 4.6237
Epoch [29][465/1780], lr: 1.25000e-04, loss: 5.0387
Epoch [29][470/1780], lr: 1.25000e-04, loss: 4.429
Epoch [29][475/1780], lr: 1.25000e-04, loss: 4.7216
Epoch [29][480/1780], lr: 1.25000e-04, loss: 3.9544
Epoch [29][485/1780], lr: 1.25000e-04, loss: 4.4183
Epoch [29][490/1780], lr: 1.25000e-04, loss: 4.8368
Epoch [29][495/1780], lr: 1.25000e-04, loss: 5.1952
Epoch [29][500/1780], lr: 1.25000e-04, loss: 4.3057
Epoch [29][505/1780], lr: 1.25000e-04, loss: 5.8746
Epoch [29][510/

Epoch [29][1205/1780], lr: 1.25000e-04, loss: 4.4724
Epoch [29][1210/1780], lr: 1.25000e-04, loss: 4.3224
Epoch [29][1215/1780], lr: 1.25000e-04, loss: 4.7702
Epoch [29][1220/1780], lr: 1.25000e-04, loss: 4.3399
Epoch [29][1225/1780], lr: 1.25000e-04, loss: 5.2206
Epoch [29][1230/1780], lr: 1.25000e-04, loss: 4.6759
Epoch [29][1235/1780], lr: 1.25000e-04, loss: 4.4584
Epoch [29][1240/1780], lr: 1.25000e-04, loss: 4.4534
Epoch [29][1245/1780], lr: 1.25000e-04, loss: 5.2826
Epoch [29][1250/1780], lr: 1.25000e-04, loss: 4.6534
Epoch [29][1255/1780], lr: 1.25000e-04, loss: 4.4758
Epoch [29][1260/1780], lr: 1.25000e-04, loss: 4.6235
Epoch [29][1265/1780], lr: 1.25000e-04, loss: 4.4576
Epoch [29][1270/1780], lr: 1.25000e-04, loss: 4.7656
Epoch [29][1275/1780], lr: 1.25000e-04, loss: 5.2254
Epoch [29][1280/1780], lr: 1.25000e-04, loss: 4.1887
Epoch [29][1285/1780], lr: 1.25000e-04, loss: 5.4327
Epoch [29][1290/1780], lr: 1.25000e-04, loss: 4.2829
Epoch [29][1295/1780], lr: 1.25000e-04, loss: 

Epoch [30][195/1780], lr: 1.25000e-04, loss: 4.7427
Epoch [30][200/1780], lr: 1.25000e-04, loss: 4.1594
Epoch [30][205/1780], lr: 1.25000e-04, loss: 4.0724
Epoch [30][210/1780], lr: 1.25000e-04, loss: 5.0303
Epoch [30][215/1780], lr: 1.25000e-04, loss: 4.6746
Epoch [30][220/1780], lr: 1.25000e-04, loss: 3.991
Epoch [30][225/1780], lr: 1.25000e-04, loss: 4.2314
Epoch [30][230/1780], lr: 1.25000e-04, loss: 4.5562
Epoch [30][235/1780], lr: 1.25000e-04, loss: 4.9098
Epoch [30][240/1780], lr: 1.25000e-04, loss: 4.9416
Epoch [30][245/1780], lr: 1.25000e-04, loss: 5.2334
Epoch [30][250/1780], lr: 1.25000e-04, loss: 4.7606
Epoch [30][255/1780], lr: 1.25000e-04, loss: 4.2177
Epoch [30][260/1780], lr: 1.25000e-04, loss: 5.016
Epoch [30][265/1780], lr: 1.25000e-04, loss: 5.7058
Epoch [30][270/1780], lr: 1.25000e-04, loss: 4.3524
Epoch [30][275/1780], lr: 1.25000e-04, loss: 4.7896
Epoch [30][280/1780], lr: 1.25000e-04, loss: 4.6949
Epoch [30][285/1780], lr: 1.25000e-04, loss: 4.8064
Epoch [30][290

Epoch [30][985/1780], lr: 1.25000e-04, loss: 4.4898
Epoch [30][990/1780], lr: 1.25000e-04, loss: 4.6107
Epoch [30][995/1780], lr: 1.25000e-04, loss: 5.182
Epoch [30][1000/1780], lr: 1.25000e-04, loss: 5.1507
Epoch [30][1005/1780], lr: 1.25000e-04, loss: 4.0775
Epoch [30][1010/1780], lr: 1.25000e-04, loss: 4.6185
Epoch [30][1015/1780], lr: 1.25000e-04, loss: 4.1006
Epoch [30][1020/1780], lr: 1.25000e-04, loss: 4.1728
Epoch [30][1025/1780], lr: 1.25000e-04, loss: 4.4016
Epoch [30][1030/1780], lr: 1.25000e-04, loss: 5.2918
Epoch [30][1035/1780], lr: 1.25000e-04, loss: 5.1846
Epoch [30][1040/1780], lr: 1.25000e-04, loss: 5.4105
Epoch [30][1045/1780], lr: 1.25000e-04, loss: 4.4997
Epoch [30][1050/1780], lr: 1.25000e-04, loss: 4.5539
Epoch [30][1055/1780], lr: 1.25000e-04, loss: 4.7625
Epoch [30][1060/1780], lr: 1.25000e-04, loss: 5.3749
Epoch [30][1065/1780], lr: 1.25000e-04, loss: 5.2413
Epoch [30][1070/1780], lr: 1.25000e-04, loss: 4.4876
Epoch [30][1075/1780], lr: 1.25000e-04, loss: 4.01

Epoch [30][1765/1780], lr: 1.25000e-04, loss: 4.5252
Epoch [30][1770/1780], lr: 1.25000e-04, loss: 5.2365
Epoch [30][1775/1780], lr: 1.25000e-04, loss: 4.4748
Epoch [30][1780/1780], lr: 1.25000e-04, loss: 4.7627
Evaluating top_k_accuracy...
top1_acc: 0.03101, top5_acc: 0.1124, train_loss: 4.7627, val_loss: 4.6252
Saving checkpoint at 30 epochs...
Epoch [31][5/1780], lr: 1.25000e-04, loss: 4.4383
Epoch [31][10/1780], lr: 1.25000e-04, loss: 5.1811
Epoch [31][15/1780], lr: 1.25000e-04, loss: 3.8049
Epoch [31][20/1780], lr: 1.25000e-04, loss: 5.3042
Epoch [31][25/1780], lr: 1.25000e-04, loss: 4.1792
Epoch [31][30/1780], lr: 1.25000e-04, loss: 4.6765
Epoch [31][35/1780], lr: 1.25000e-04, loss: 4.4118
Epoch [31][40/1780], lr: 1.25000e-04, loss: 5.3903
Epoch [31][45/1780], lr: 1.25000e-04, loss: 3.8147
Epoch [31][50/1780], lr: 1.25000e-04, loss: 4.4092
Epoch [31][55/1780], lr: 1.25000e-04, loss: 3.8908
Epoch [31][60/1780], lr: 1.25000e-04, loss: 4.3223
Epoch [31][65/1780], lr: 1.25000e-04, lo

Epoch [31][765/1780], lr: 1.25000e-04, loss: 5.1997
Epoch [31][770/1780], lr: 1.25000e-04, loss: 4.3057
Epoch [31][775/1780], lr: 1.25000e-04, loss: 4.8793
Epoch [31][780/1780], lr: 1.25000e-04, loss: 4.2222
Epoch [31][785/1780], lr: 1.25000e-04, loss: 4.1937
Epoch [31][790/1780], lr: 1.25000e-04, loss: 4.7266
Epoch [31][795/1780], lr: 1.25000e-04, loss: 3.6021
Epoch [31][800/1780], lr: 1.25000e-04, loss: 4.6713
Epoch [31][805/1780], lr: 1.25000e-04, loss: 4.0304
Epoch [31][810/1780], lr: 1.25000e-04, loss: 4.8597
Epoch [31][815/1780], lr: 1.25000e-04, loss: 4.1676
Epoch [31][820/1780], lr: 1.25000e-04, loss: 5.4814
Epoch [31][825/1780], lr: 1.25000e-04, loss: 3.732
Epoch [31][830/1780], lr: 1.25000e-04, loss: 4.6966
Epoch [31][835/1780], lr: 1.25000e-04, loss: 4.0385
Epoch [31][840/1780], lr: 1.25000e-04, loss: 4.3339
Epoch [31][845/1780], lr: 1.25000e-04, loss: 4.0563
Epoch [31][850/1780], lr: 1.25000e-04, loss: 4.5985
Epoch [31][855/1780], lr: 1.25000e-04, loss: 4.8504
Epoch [31][86

Epoch [31][1545/1780], lr: 1.25000e-04, loss: 4.4313
Epoch [31][1550/1780], lr: 1.25000e-04, loss: 4.0808
Epoch [31][1555/1780], lr: 1.25000e-04, loss: 4.1802
Epoch [31][1560/1780], lr: 1.25000e-04, loss: 3.8638
Epoch [31][1565/1780], lr: 1.25000e-04, loss: 4.9829
Epoch [31][1570/1780], lr: 1.25000e-04, loss: 4.5801
Epoch [31][1575/1780], lr: 1.25000e-04, loss: 5.0165
Epoch [31][1580/1780], lr: 1.25000e-04, loss: 4.3959
Epoch [31][1585/1780], lr: 1.25000e-04, loss: 4.3307
Epoch [31][1590/1780], lr: 1.25000e-04, loss: 4.0228
Epoch [31][1595/1780], lr: 1.25000e-04, loss: 5.1333
Epoch [31][1600/1780], lr: 1.25000e-04, loss: 4.6486
Epoch [31][1605/1780], lr: 1.25000e-04, loss: 3.6266
Epoch [31][1610/1780], lr: 1.25000e-04, loss: 4.27
Epoch [31][1615/1780], lr: 1.25000e-04, loss: 4.8633
Epoch [31][1620/1780], lr: 1.25000e-04, loss: 4.3527
Epoch [31][1625/1780], lr: 1.25000e-04, loss: 3.7856
Epoch [31][1630/1780], lr: 1.25000e-04, loss: 4.1496
Epoch [31][1635/1780], lr: 1.25000e-04, loss: 4.

Epoch [32][540/1780], lr: 1.25000e-04, loss: 4.6465
Epoch [32][545/1780], lr: 1.25000e-04, loss: 3.7417
Epoch [32][550/1780], lr: 1.25000e-04, loss: 3.7466
Epoch [32][555/1780], lr: 1.25000e-04, loss: 5.0334
Epoch [32][560/1780], lr: 1.25000e-04, loss: 4.7949
Epoch [32][565/1780], lr: 1.25000e-04, loss: 4.4724
Epoch [32][570/1780], lr: 1.25000e-04, loss: 4.1044
Epoch [32][575/1780], lr: 1.25000e-04, loss: 4.2339
Epoch [32][580/1780], lr: 1.25000e-04, loss: 5.475
Epoch [32][585/1780], lr: 1.25000e-04, loss: 4.4686
Epoch [32][590/1780], lr: 1.25000e-04, loss: 4.0105
Epoch [32][595/1780], lr: 1.25000e-04, loss: 4.732
Epoch [32][600/1780], lr: 1.25000e-04, loss: 4.7962
Epoch [32][605/1780], lr: 1.25000e-04, loss: 5.2293
Epoch [32][610/1780], lr: 1.25000e-04, loss: 4.6828
Epoch [32][615/1780], lr: 1.25000e-04, loss: 4.7423
Epoch [32][620/1780], lr: 1.25000e-04, loss: 4.8235
Epoch [32][625/1780], lr: 1.25000e-04, loss: 4.0111
Epoch [32][630/1780], lr: 1.25000e-04, loss: 5.6892
Epoch [32][635

Epoch [32][1325/1780], lr: 1.25000e-04, loss: 4.366
Epoch [32][1330/1780], lr: 1.25000e-04, loss: 3.9091
Epoch [32][1335/1780], lr: 1.25000e-04, loss: 4.023
Epoch [32][1340/1780], lr: 1.25000e-04, loss: 4.3582
Epoch [32][1345/1780], lr: 1.25000e-04, loss: 4.5438
Epoch [32][1350/1780], lr: 1.25000e-04, loss: 4.3177
Epoch [32][1355/1780], lr: 1.25000e-04, loss: 4.5841
Epoch [32][1360/1780], lr: 1.25000e-04, loss: 3.3744
Epoch [32][1365/1780], lr: 1.25000e-04, loss: 4.9495
Epoch [32][1370/1780], lr: 1.25000e-04, loss: 4.9973
Epoch [32][1375/1780], lr: 1.25000e-04, loss: 4.4543
Epoch [32][1380/1780], lr: 1.25000e-04, loss: 4.0903
Epoch [32][1385/1780], lr: 1.25000e-04, loss: 3.6164
Epoch [32][1390/1780], lr: 1.25000e-04, loss: 5.2055
Epoch [32][1395/1780], lr: 1.25000e-04, loss: 4.0889
Epoch [32][1400/1780], lr: 1.25000e-04, loss: 3.5949
Epoch [32][1405/1780], lr: 1.25000e-04, loss: 4.779
Epoch [32][1410/1780], lr: 1.25000e-04, loss: 5.3077
Epoch [32][1415/1780], lr: 1.25000e-04, loss: 5.3

Epoch [33][315/1780], lr: 1.25000e-04, loss: 4.5898
Epoch [33][320/1780], lr: 1.25000e-04, loss: 4.047
Epoch [33][325/1780], lr: 1.25000e-04, loss: 4.3883
Epoch [33][330/1780], lr: 1.25000e-04, loss: 3.5269
Epoch [33][335/1780], lr: 1.25000e-04, loss: 4.7576
Epoch [33][340/1780], lr: 1.25000e-04, loss: 4.3955
Epoch [33][345/1780], lr: 1.25000e-04, loss: 3.8915
Epoch [33][350/1780], lr: 1.25000e-04, loss: 5.0365
Epoch [33][355/1780], lr: 1.25000e-04, loss: 4.7769
Epoch [33][360/1780], lr: 1.25000e-04, loss: 4.8067
Epoch [33][365/1780], lr: 1.25000e-04, loss: 3.9099
Epoch [33][370/1780], lr: 1.25000e-04, loss: 4.2665
Epoch [33][375/1780], lr: 1.25000e-04, loss: 4.3634
Epoch [33][380/1780], lr: 1.25000e-04, loss: 4.9645
Epoch [33][385/1780], lr: 1.25000e-04, loss: 4.4095
Epoch [33][390/1780], lr: 1.25000e-04, loss: 4.0243
Epoch [33][395/1780], lr: 1.25000e-04, loss: 4.2476
Epoch [33][400/1780], lr: 1.25000e-04, loss: 4.5164
Epoch [33][405/1780], lr: 1.25000e-04, loss: 4.1028
Epoch [33][41

Epoch [33][1105/1780], lr: 1.25000e-04, loss: 3.6812
Epoch [33][1110/1780], lr: 1.25000e-04, loss: 3.9113
Epoch [33][1115/1780], lr: 1.25000e-04, loss: 5.0894
Epoch [33][1120/1780], lr: 1.25000e-04, loss: 4.6501
Epoch [33][1125/1780], lr: 1.25000e-04, loss: 4.9304
Epoch [33][1130/1780], lr: 1.25000e-04, loss: 4.3442
Epoch [33][1135/1780], lr: 1.25000e-04, loss: 4.237
Epoch [33][1140/1780], lr: 1.25000e-04, loss: 4.255
Epoch [33][1145/1780], lr: 1.25000e-04, loss: 4.2985
Epoch [33][1150/1780], lr: 1.25000e-04, loss: 4.4589
Epoch [33][1155/1780], lr: 1.25000e-04, loss: 3.6316
Epoch [33][1160/1780], lr: 1.25000e-04, loss: 5.0104
Epoch [33][1165/1780], lr: 1.25000e-04, loss: 4.1456
Epoch [33][1170/1780], lr: 1.25000e-04, loss: 4.1414
Epoch [33][1175/1780], lr: 1.25000e-04, loss: 3.9571
Epoch [33][1180/1780], lr: 1.25000e-04, loss: 4.8576
Epoch [33][1185/1780], lr: 1.25000e-04, loss: 4.9604
Epoch [33][1190/1780], lr: 1.25000e-04, loss: 4.3748
Epoch [33][1195/1780], lr: 1.25000e-04, loss: 5.