# Squeeze-and-Excitation Network (SENet) in PyTorch and TensorFlow

For a more detailed breakdown of SENet, check out the [full tutorial on the blog](https://blog.paperspace.com/channel-attention-squeeze-and-excitation-networks/).

In [3]:
# Model Parameter and FLOP counter
# !pip install --upgrade git+https://github.com/sovrasov/flops-counter.pytorch.git

In [4]:
!nvidia-smi

Tue Sep 29 16:46:53 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 450.36.06    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro P6000        On   | 00000000:00:05.0 Off |                  Off |
| 26%   59C    P0   156W / 250W |   3119MiB / 24449MiB |     99%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [5]:
best_prec1 = 0
evaluate = True
import time
import torch
import torch.nn as nn
from ptflops import get_model_complexity_info
import torch.nn.functional as F
import torch.nn.init as init
import os
import shutil
import time
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
import numpy as np
import math
from torch.nn.parameter import Parameter

def main():
    global best_prec1, evaluate

    __all__ = ['ResNet', 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet1202']


    def _weights_init(m):
        classname = m.__class__.__name__
        #print(classname)
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
            init.kaiming_normal_(m.weight)


    class LambdaLayer(nn.Module):
        def __init__(self, lambd):
            super(LambdaLayer, self).__init__()
            self.lambd = lambd

        def forward(self, x):
            return self.lambd(x)

    ### Squeeze and Excitation Class definition
    class SE(nn.Module):
        def __init__(self, channel, reduction_ratio =16):
            super(SE, self).__init__()
            ### Global Average Pooling
            self.gap = nn.AdaptiveAvgPool2d(1)
            
            ### Fully Connected Multi-Layer Perceptron (FC-MLP)
            self.mlp = nn.Sequential(
                nn.Linear(channel, channel // reduction_ratio, bias=False),
                nn.ReLU(inplace=True),
                nn.Linear(channel // reduction_ratio, channel, bias=False),
                nn.Sigmoid()
            )

        def forward(self, x):
            b, c, _, _ = x.size()
            y = self.gap(x).view(b, c)
            y = self.mlp(y).view(b, c, 1, 1)
            return x * y.expand_as(x)


    class BasicBlock(nn.Module):
        expansion = 1

        def __init__(self, in_planes, planes, stride=1, option='A', use_se = True):
            super(BasicBlock, self).__init__()
            self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(planes)
            self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn2 = nn.BatchNorm2d(planes)
            self.use_se = use_se
            if self.use_se == True:
                self.se = SE(planes)

            self.shortcut = nn.Sequential()
            if stride != 1 or in_planes != planes:
                if option == 'A':
                    """
                    For CIFAR10 ResNet paper uses option A.
                    """
                    self.shortcut = LambdaLayer(lambda x:
                                                F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
                elif option == 'B':
                    self.shortcut = nn.Sequential(
                        nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                        nn.BatchNorm2d(self.expansion * planes)
                    )
            

        def forward(self, x):
            out = F.relu(self.bn1(self.conv1(x)))
            out = self.bn2(self.conv2(out))
            out += self.shortcut(x)
            out = F.relu(out)
            if self.use_se == True:
                out = self.se(out)
            return out


    class ResNet(nn.Module):
        def __init__(self, block, num_blocks, num_classes=10):
            super(ResNet, self).__init__()
            self.in_planes = 16

            self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(16)
            self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
            self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
            self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
            self.linear = nn.Linear(64, num_classes)

            self.apply(_weights_init)

        def _make_layer(self, block, planes, num_blocks, stride):
            strides = [stride] + [1]*(num_blocks-1)
            layers = []
            for stride in strides:
                layers.append(block(self.in_planes, planes, stride))
                self.in_planes = planes * block.expansion

            return nn.Sequential(*layers)

        def forward(self, x):
            out = F.relu(self.bn1(self.conv1(x)))
            out = self.layer1(out)
            out = self.layer2(out)
            out = self.layer3(out)
            out = F.avg_pool2d(out, out.size()[3])
            out = out.view(out.size(0), -1)
            out = self.linear(out)
            return out


    def resnet20():
        return ResNet(BasicBlock, [3, 3, 3])


    def resnet32():
        return ResNet(BasicBlock, [5, 5, 5])


    def resnet44():
        return ResNet(BasicBlock, [7, 7, 7])


    def resnet56():
        return ResNet(BasicBlock, [9, 9, 9])


    def resnet110():
        return ResNet(BasicBlock, [18, 18, 18])


    def resnet1202():
        return ResNet(BasicBlock, [200, 200, 200])

    model = resnet20()

    with torch.cuda.device(0):
      flops, params = get_model_complexity_info(model, (3, 224, 224), as_strings=True, print_per_layer_stat=True)
      print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
      print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    model.cuda()

    cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize,
        ]), download=True),
        batch_size=128, shuffle=True,
        num_workers=4, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=128, shuffle=False,
        num_workers=4, pin_memory=True)

    # define loss function (criterion) and pptimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(), 0.1,
                                momentum=0.9,
                                weight_decay=5e-4)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        milestones=[100, 150], last_epoch=0 - 1)


    for epoch in range(0, 200):

        # train for one epoch
        print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
        train(train_loader, model, criterion, optimizer, epoch)
        lr_scheduler.step()

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        if epoch > 0 and epoch % 20 == 0:
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best, filename=os.path.join('./', 'vanilla_checkpoint.th'))

        save_checkpoint({
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        }, is_best, filename=os.path.join('./', 'vanilla_model.th'))



def train(train_loader, model, criterion, optimizer, epoch):
    """
        Run one train epoch
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda()
        input_var = input.cuda()
        target_var = target

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        output = output.float()
        loss = loss.float()
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 20 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1))


def validate(val_loader, model, criterion):
    """
    Run evaluation
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            target = target.cuda()
            input_var = input.cuda()
            target_var = target.cuda()


            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            prec1 = accuracy(output.data, target)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % 20 == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i, len(val_loader), batch_time=batch_time, loss=losses,
                          top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'
          .format(top1=top1))

    return top1.avg

def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    """
    Save the training model
    """
    torch.save(state, filename)

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res



In [6]:
if __name__ == '__main__':
    main()

ResNet(
  0.272 M, 100.000% Params, 2.01 GMac, 100.000% MACs, 
  (conv1): Conv2d(0.0 M, 0.159% Params, 0.022 GMac, 1.079% MACs, 3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(0.0 M, 0.012% Params, 0.002 GMac, 0.080% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    0.014 M, 5.193% Params, 0.706 GMac, 35.114% MACs, 
    (0): BasicBlock(
      0.005 M, 1.731% Params, 0.235 GMac, 11.705% MACs, 
      (conv1): Conv2d(0.002 M, 0.848% Params, 0.116 GMac, 5.753% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(0.0 M, 0.012% Params, 0.002 GMac, 0.080% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(0.002 M, 0.848% Params, 0.116 GMac, 5.753% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(0.0 M, 0.012% Params, 0.002 GMac, 0.080% MACs, 16, eps=1e-05

Files already downloaded and verified
current lr 1.00000e-01
Epoch: [0][0/391]	Time 0.798 (0.798)	Data 0.240 (0.240)	Loss 2.3965 (2.3965)	Prec@1 13.281 (13.281)
Epoch: [0][20/391]	Time 0.064 (0.093)	Data 0.000 (0.012)	Loss 2.0916 (2.2359)	Prec@1 21.875 (14.174)
Epoch: [0][40/391]	Time 0.068 (0.076)	Data 0.000 (0.006)	Loss 1.9182 (2.1109)	Prec@1 20.312 (18.788)
Epoch: [0][60/391]	Time 0.063 (0.070)	Data 0.000 (0.004)	Loss 1.8620 (2.0432)	Prec@1 27.344 (20.761)
Epoch: [0][80/391]	Time 0.066 (0.067)	Data 0.000 (0.003)	Loss 1.8958 (1.9928)	Prec@1 21.094 (22.512)
Epoch: [0][100/391]	Time 0.055 (0.065)	Data 0.000 (0.003)	Loss 1.8057 (1.9547)	Prec@1 29.688 (24.080)
Epoch: [0][120/391]	Time 0.054 (0.063)	Data 0.000 (0.002)	Loss 1.7386 (1.9199)	Prec@1 30.469 (25.400)
Epoch: [0][140/391]	Time 0.057 (0.063)	Data 0.000 (0.002)	Loss 1.7962 (1.8953)	Prec@1 36.719 (26.585)
Epoch: [0][160/391]	Time 0.048 (0.062)	Data 0.000 (0.002)	Loss 1.5293 (1.8785)	Prec@1 40.625 (27.417)
Epoch: [0][180/391]	Time 0.

Epoch: [3][200/391]	Time 0.060 (0.060)	Data 0.000 (0.002)	Loss 0.9488 (0.9014)	Prec@1 68.750 (68.404)
Epoch: [3][220/391]	Time 0.050 (0.059)	Data 0.000 (0.002)	Loss 0.8739 (0.9006)	Prec@1 71.094 (68.375)
Epoch: [3][240/391]	Time 0.054 (0.059)	Data 0.000 (0.001)	Loss 0.7579 (0.8971)	Prec@1 73.438 (68.539)
Epoch: [3][260/391]	Time 0.052 (0.059)	Data 0.000 (0.001)	Loss 0.9361 (0.8983)	Prec@1 64.062 (68.502)
Epoch: [3][280/391]	Time 0.059 (0.059)	Data 0.000 (0.001)	Loss 0.9618 (0.8958)	Prec@1 67.969 (68.611)
Epoch: [3][300/391]	Time 0.059 (0.059)	Data 0.000 (0.001)	Loss 0.9098 (0.8946)	Prec@1 66.406 (68.651)
Epoch: [3][320/391]	Time 0.053 (0.059)	Data 0.000 (0.001)	Loss 0.9566 (0.8922)	Prec@1 67.188 (68.740)
Epoch: [3][340/391]	Time 0.065 (0.059)	Data 0.000 (0.001)	Loss 0.7931 (0.8895)	Prec@1 71.094 (68.871)
Epoch: [3][360/391]	Time 0.048 (0.059)	Data 0.000 (0.001)	Loss 0.7112 (0.8854)	Prec@1 77.344 (69.005)
Epoch: [3][380/391]	Time 0.052 (0.059)	Data 0.000 (0.001)	Loss 0.6907 (0.8809)	Pre

Test: [20/79]	Time 0.020 (0.026)	Loss 0.8012 (0.8714)	Prec@1 70.312 (69.234)
Test: [40/79]	Time 0.014 (0.020)	Loss 0.9485 (0.8407)	Prec@1 66.406 (71.113)
Test: [60/79]	Time 0.026 (0.019)	Loss 0.9097 (0.8440)	Prec@1 65.625 (70.761)
 * Prec@1 70.680
current lr 1.00000e-01
Epoch: [7][0/391]	Time 0.323 (0.323)	Data 0.249 (0.249)	Loss 0.5539 (0.5539)	Prec@1 78.906 (78.906)
Epoch: [7][20/391]	Time 0.064 (0.070)	Data 0.000 (0.012)	Loss 0.5711 (0.6499)	Prec@1 79.688 (77.753)
Epoch: [7][40/391]	Time 0.062 (0.064)	Data 0.000 (0.006)	Loss 0.6677 (0.6712)	Prec@1 78.125 (77.401)
Epoch: [7][60/391]	Time 0.050 (0.062)	Data 0.000 (0.004)	Loss 0.7037 (0.6695)	Prec@1 80.469 (77.177)
Epoch: [7][80/391]	Time 0.056 (0.061)	Data 0.000 (0.003)	Loss 0.5819 (0.6736)	Prec@1 77.344 (76.968)
Epoch: [7][100/391]	Time 0.048 (0.060)	Data 0.000 (0.003)	Loss 0.6271 (0.6735)	Prec@1 77.344 (76.903)
Epoch: [7][120/391]	Time 0.047 (0.059)	Data 0.000 (0.002)	Loss 0.7328 (0.6780)	Prec@1 75.781 (76.756)
Epoch: [7][140/391]	T

Epoch: [10][160/391]	Time 0.046 (0.054)	Data 0.000 (0.002)	Loss 0.4433 (0.6230)	Prec@1 84.375 (78.547)
Epoch: [10][180/391]	Time 0.048 (0.054)	Data 0.000 (0.002)	Loss 0.4874 (0.6257)	Prec@1 81.250 (78.479)
Epoch: [10][200/391]	Time 0.055 (0.054)	Data 0.000 (0.001)	Loss 0.6581 (0.6272)	Prec@1 80.469 (78.490)
Epoch: [10][220/391]	Time 0.064 (0.055)	Data 0.000 (0.001)	Loss 0.4831 (0.6274)	Prec@1 84.375 (78.468)
Epoch: [10][240/391]	Time 0.063 (0.055)	Data 0.000 (0.001)	Loss 0.7708 (0.6302)	Prec@1 75.781 (78.404)
Epoch: [10][260/391]	Time 0.061 (0.055)	Data 0.000 (0.001)	Loss 0.7403 (0.6299)	Prec@1 71.094 (78.397)
Epoch: [10][280/391]	Time 0.069 (0.055)	Data 0.000 (0.001)	Loss 0.6595 (0.6301)	Prec@1 76.562 (78.397)
Epoch: [10][300/391]	Time 0.060 (0.055)	Data 0.000 (0.001)	Loss 0.5780 (0.6314)	Prec@1 82.812 (78.395)
Epoch: [10][320/391]	Time 0.055 (0.055)	Data 0.000 (0.001)	Loss 0.6877 (0.6287)	Prec@1 75.000 (78.466)
Epoch: [10][340/391]	Time 0.051 (0.056)	Data 0.000 (0.001)	Loss 0.6325 (0

Epoch: [13][360/391]	Time 0.049 (0.056)	Data 0.000 (0.001)	Loss 0.7811 (0.5886)	Prec@1 71.875 (79.662)
Epoch: [13][380/391]	Time 0.055 (0.056)	Data 0.000 (0.001)	Loss 0.8405 (0.5890)	Prec@1 71.875 (79.640)
Test: [0/79]	Time 0.216 (0.216)	Loss 0.8866 (0.8866)	Prec@1 72.656 (72.656)
Test: [20/79]	Time 0.015 (0.027)	Loss 0.8135 (0.8631)	Prec@1 71.875 (71.838)
Test: [40/79]	Time 0.010 (0.021)	Loss 1.0686 (0.8566)	Prec@1 66.406 (71.723)
Test: [60/79]	Time 0.018 (0.020)	Loss 0.9046 (0.8583)	Prec@1 67.969 (71.542)
 * Prec@1 71.650
current lr 1.00000e-01
Epoch: [14][0/391]	Time 0.332 (0.332)	Data 0.262 (0.262)	Loss 0.5334 (0.5334)	Prec@1 80.469 (80.469)
Epoch: [14][20/391]	Time 0.064 (0.071)	Data 0.000 (0.013)	Loss 0.6220 (0.5685)	Prec@1 79.688 (80.060)
Epoch: [14][40/391]	Time 0.052 (0.064)	Data 0.000 (0.007)	Loss 0.4419 (0.5732)	Prec@1 83.594 (80.335)
Epoch: [14][60/391]	Time 0.054 (0.062)	Data 0.000 (0.005)	Loss 0.6001 (0.5873)	Prec@1 80.469 (79.956)
Epoch: [14][80/391]	Time 0.064 (0.061)	D

Epoch: [17][100/391]	Time 0.056 (0.060)	Data 0.000 (0.003)	Loss 0.5085 (0.5624)	Prec@1 82.812 (80.631)
Epoch: [17][120/391]	Time 0.063 (0.059)	Data 0.000 (0.002)	Loss 0.5291 (0.5579)	Prec@1 80.469 (80.727)
Epoch: [17][140/391]	Time 0.052 (0.059)	Data 0.000 (0.002)	Loss 0.3553 (0.5541)	Prec@1 86.719 (80.868)
Epoch: [17][160/391]	Time 0.051 (0.059)	Data 0.000 (0.002)	Loss 0.5572 (0.5529)	Prec@1 81.250 (80.808)
Epoch: [17][180/391]	Time 0.049 (0.059)	Data 0.000 (0.002)	Loss 0.7044 (0.5555)	Prec@1 75.781 (80.754)
Epoch: [17][200/391]	Time 0.059 (0.059)	Data 0.000 (0.002)	Loss 0.5737 (0.5557)	Prec@1 78.906 (80.710)
Epoch: [17][220/391]	Time 0.060 (0.058)	Data 0.000 (0.001)	Loss 0.7361 (0.5590)	Prec@1 78.125 (80.603)
Epoch: [17][240/391]	Time 0.063 (0.058)	Data 0.000 (0.001)	Loss 0.6430 (0.5603)	Prec@1 75.781 (80.550)
Epoch: [17][260/391]	Time 0.046 (0.058)	Data 0.000 (0.001)	Loss 0.7035 (0.5597)	Prec@1 79.688 (80.594)
Epoch: [17][280/391]	Time 0.048 (0.058)	Data 0.000 (0.001)	Loss 0.5574 (0

Epoch: [20][300/391]	Time 0.062 (0.056)	Data 0.000 (0.001)	Loss 0.3141 (0.5383)	Prec@1 90.625 (81.494)
Epoch: [20][320/391]	Time 0.060 (0.056)	Data 0.000 (0.001)	Loss 0.5330 (0.5369)	Prec@1 82.812 (81.527)
Epoch: [20][340/391]	Time 0.062 (0.057)	Data 0.000 (0.001)	Loss 0.6176 (0.5371)	Prec@1 77.344 (81.520)
Epoch: [20][360/391]	Time 0.049 (0.057)	Data 0.000 (0.001)	Loss 0.6100 (0.5383)	Prec@1 77.344 (81.484)
Epoch: [20][380/391]	Time 0.066 (0.057)	Data 0.000 (0.001)	Loss 0.5365 (0.5401)	Prec@1 81.250 (81.406)
Test: [0/79]	Time 0.212 (0.212)	Loss 0.7248 (0.7248)	Prec@1 75.781 (75.781)
Test: [20/79]	Time 0.015 (0.025)	Loss 0.7718 (0.7178)	Prec@1 73.438 (76.562)
Test: [40/79]	Time 0.016 (0.021)	Loss 0.8041 (0.7263)	Prec@1 73.438 (76.391)
Test: [60/79]	Time 0.018 (0.019)	Loss 0.6612 (0.7132)	Prec@1 79.688 (76.755)
 * Prec@1 76.770
current lr 1.00000e-01
Epoch: [21][0/391]	Time 0.302 (0.302)	Data 0.244 (0.244)	Loss 0.5607 (0.5607)	Prec@1 75.781 (75.781)
Epoch: [21][20/391]	Time 0.057 (0.068

Epoch: [24][40/391]	Time 0.045 (0.070)	Data 0.000 (0.019)	Loss 0.5325 (0.5262)	Prec@1 77.344 (81.593)
Epoch: [24][60/391]	Time 0.063 (0.066)	Data 0.000 (0.013)	Loss 0.5274 (0.5171)	Prec@1 82.812 (82.057)
Epoch: [24][80/391]	Time 0.060 (0.064)	Data 0.000 (0.010)	Loss 0.5322 (0.5253)	Prec@1 80.469 (81.761)
Epoch: [24][100/391]	Time 0.061 (0.062)	Data 0.000 (0.008)	Loss 0.6385 (0.5226)	Prec@1 76.562 (81.861)
Epoch: [24][120/391]	Time 0.057 (0.061)	Data 0.000 (0.007)	Loss 0.6121 (0.5231)	Prec@1 79.688 (81.934)
Epoch: [24][140/391]	Time 0.064 (0.061)	Data 0.000 (0.006)	Loss 0.5341 (0.5220)	Prec@1 84.375 (82.087)
Epoch: [24][160/391]	Time 0.063 (0.060)	Data 0.000 (0.005)	Loss 0.5478 (0.5257)	Prec@1 76.562 (81.910)
Epoch: [24][180/391]	Time 0.062 (0.060)	Data 0.000 (0.005)	Loss 0.6052 (0.5246)	Prec@1 82.031 (82.079)
Epoch: [24][200/391]	Time 0.059 (0.060)	Data 0.000 (0.004)	Loss 0.5959 (0.5245)	Prec@1 81.250 (82.121)
Epoch: [24][220/391]	Time 0.059 (0.059)	Data 0.000 (0.004)	Loss 0.4421 (0.52

Epoch: [27][240/391]	Time 0.055 (0.056)	Data 0.000 (0.001)	Loss 0.5609 (0.5016)	Prec@1 80.469 (82.628)
Epoch: [27][260/391]	Time 0.049 (0.056)	Data 0.000 (0.001)	Loss 0.6583 (0.5025)	Prec@1 75.781 (82.621)
Epoch: [27][280/391]	Time 0.054 (0.056)	Data 0.000 (0.001)	Loss 0.5504 (0.5070)	Prec@1 82.031 (82.490)
Epoch: [27][300/391]	Time 0.057 (0.056)	Data 0.000 (0.001)	Loss 0.4868 (0.5077)	Prec@1 82.812 (82.470)
Epoch: [27][320/391]	Time 0.043 (0.056)	Data 0.000 (0.001)	Loss 0.4878 (0.5076)	Prec@1 83.594 (82.467)
Epoch: [27][340/391]	Time 0.068 (0.056)	Data 0.000 (0.001)	Loss 0.5912 (0.5085)	Prec@1 77.344 (82.432)
Epoch: [27][360/391]	Time 0.062 (0.056)	Data 0.000 (0.001)	Loss 0.5077 (0.5084)	Prec@1 78.906 (82.477)
Epoch: [27][380/391]	Time 0.068 (0.056)	Data 0.000 (0.001)	Loss 0.4479 (0.5078)	Prec@1 83.594 (82.476)
Test: [0/79]	Time 0.262 (0.262)	Loss 0.8854 (0.8854)	Prec@1 73.438 (73.438)
Test: [20/79]	Time 0.014 (0.030)	Loss 0.9020 (0.9792)	Prec@1 71.875 (70.833)
Test: [40/79]	Time 0.01

Test: [60/79]	Time 0.030 (0.024)	Loss 0.9269 (0.8026)	Prec@1 70.312 (73.476)
 * Prec@1 73.860
current lr 1.00000e-01
Epoch: [31][0/391]	Time 0.260 (0.260)	Data 0.209 (0.209)	Loss 0.4777 (0.4777)	Prec@1 83.594 (83.594)
Epoch: [31][20/391]	Time 0.050 (0.071)	Data 0.000 (0.014)	Loss 0.3796 (0.4696)	Prec@1 87.500 (83.780)
Epoch: [31][40/391]	Time 0.066 (0.065)	Data 0.000 (0.007)	Loss 0.5161 (0.4653)	Prec@1 80.469 (84.013)
Epoch: [31][60/391]	Time 0.059 (0.062)	Data 0.000 (0.005)	Loss 0.4057 (0.4662)	Prec@1 84.375 (83.850)
Epoch: [31][80/391]	Time 0.065 (0.061)	Data 0.000 (0.004)	Loss 0.5396 (0.4839)	Prec@1 82.812 (83.140)
Epoch: [31][100/391]	Time 0.055 (0.060)	Data 0.000 (0.003)	Loss 0.4682 (0.4869)	Prec@1 84.375 (82.959)
Epoch: [31][120/391]	Time 0.059 (0.060)	Data 0.000 (0.003)	Loss 0.6358 (0.4945)	Prec@1 79.688 (82.858)
Epoch: [31][140/391]	Time 0.055 (0.060)	Data 0.000 (0.002)	Loss 0.4994 (0.4980)	Prec@1 82.031 (82.796)
Epoch: [31][160/391]	Time 0.062 (0.060)	Data 0.000 (0.002)	Loss 0

Epoch: [34][180/391]	Time 0.052 (0.058)	Data 0.000 (0.002)	Loss 0.6417 (0.4699)	Prec@1 76.562 (83.654)
Epoch: [34][200/391]	Time 0.043 (0.057)	Data 0.000 (0.001)	Loss 0.4736 (0.4752)	Prec@1 82.031 (83.559)
Epoch: [34][220/391]	Time 0.051 (0.056)	Data 0.000 (0.001)	Loss 0.5903 (0.4798)	Prec@1 79.688 (83.495)
Epoch: [34][240/391]	Time 0.056 (0.056)	Data 0.000 (0.001)	Loss 0.4449 (0.4786)	Prec@1 82.812 (83.591)
Epoch: [34][260/391]	Time 0.053 (0.055)	Data 0.000 (0.001)	Loss 0.5554 (0.4800)	Prec@1 82.031 (83.609)
Epoch: [34][280/391]	Time 0.051 (0.055)	Data 0.000 (0.001)	Loss 0.5114 (0.4813)	Prec@1 81.250 (83.588)
Epoch: [34][300/391]	Time 0.059 (0.056)	Data 0.000 (0.001)	Loss 0.4546 (0.4834)	Prec@1 87.500 (83.531)
Epoch: [34][320/391]	Time 0.060 (0.056)	Data 0.000 (0.001)	Loss 0.3340 (0.4854)	Prec@1 89.844 (83.477)
Epoch: [34][340/391]	Time 0.062 (0.056)	Data 0.000 (0.001)	Loss 0.3449 (0.4858)	Prec@1 88.281 (83.452)
Epoch: [34][360/391]	Time 0.053 (0.056)	Data 0.000 (0.001)	Loss 0.4729 (0

Epoch: [37][380/391]	Time 0.057 (0.057)	Data 0.000 (0.001)	Loss 0.5259 (0.4825)	Prec@1 82.031 (83.448)
Test: [0/79]	Time 0.228 (0.228)	Loss 0.7395 (0.7395)	Prec@1 78.906 (78.906)
Test: [20/79]	Time 0.013 (0.029)	Loss 0.8846 (0.8952)	Prec@1 70.312 (73.586)
Test: [40/79]	Time 0.014 (0.022)	Loss 0.9266 (0.8786)	Prec@1 71.875 (73.914)
Test: [60/79]	Time 0.007 (0.019)	Loss 0.9049 (0.8850)	Prec@1 74.219 (73.322)
 * Prec@1 73.510
current lr 1.00000e-01
Epoch: [38][0/391]	Time 0.366 (0.366)	Data 0.323 (0.323)	Loss 0.4326 (0.4326)	Prec@1 85.938 (85.938)
Epoch: [38][20/391]	Time 0.048 (0.064)	Data 0.000 (0.016)	Loss 0.3492 (0.4263)	Prec@1 87.500 (85.342)
Epoch: [38][40/391]	Time 0.063 (0.058)	Data 0.000 (0.008)	Loss 0.4133 (0.4707)	Prec@1 85.938 (84.032)
Epoch: [38][60/391]	Time 0.050 (0.056)	Data 0.000 (0.006)	Loss 0.4569 (0.4647)	Prec@1 85.938 (84.311)
Epoch: [38][80/391]	Time 0.050 (0.056)	Data 0.000 (0.004)	Loss 0.4984 (0.4550)	Prec@1 82.812 (84.520)
Epoch: [38][100/391]	Time 0.056 (0.056)	D

Test: [20/79]	Time 0.013 (0.027)	Loss 0.5504 (0.7004)	Prec@1 81.250 (77.232)
Test: [40/79]	Time 0.016 (0.022)	Loss 0.9783 (0.7198)	Prec@1 62.500 (76.791)
Test: [60/79]	Time 0.018 (0.020)	Loss 0.6735 (0.7229)	Prec@1 78.125 (76.998)
 * Prec@1 76.980
current lr 1.00000e-01
Epoch: [46][0/391]	Time 0.295 (0.295)	Data 0.237 (0.237)	Loss 0.7432 (0.7432)	Prec@1 76.562 (76.562)
Epoch: [46][20/391]	Time 0.055 (0.068)	Data 0.000 (0.012)	Loss 0.3991 (0.4687)	Prec@1 85.156 (83.594)
Epoch: [46][40/391]	Time 0.056 (0.059)	Data 0.000 (0.006)	Loss 0.5421 (0.4662)	Prec@1 78.906 (83.784)
Epoch: [46][60/391]	Time 0.060 (0.057)	Data 0.004 (0.004)	Loss 0.4283 (0.4640)	Prec@1 83.594 (83.773)
Epoch: [46][80/391]	Time 0.051 (0.056)	Data 0.000 (0.003)	Loss 0.4138 (0.4517)	Prec@1 85.156 (84.211)
Epoch: [46][100/391]	Time 0.056 (0.055)	Data 0.000 (0.003)	Loss 0.4667 (0.4496)	Prec@1 83.594 (84.244)
Epoch: [46][120/391]	Time 0.046 (0.054)	Data 0.000 (0.002)	Loss 0.3663 (0.4563)	Prec@1 89.062 (84.097)
Epoch: [46][14

Epoch: [49][160/391]	Time 0.062 (0.060)	Data 0.000 (0.003)	Loss 0.4544 (0.4601)	Prec@1 87.500 (84.137)
Epoch: [49][180/391]	Time 0.057 (0.059)	Data 0.000 (0.002)	Loss 0.3965 (0.4634)	Prec@1 88.281 (84.034)
Epoch: [49][200/391]	Time 0.054 (0.059)	Data 0.000 (0.002)	Loss 0.4867 (0.4657)	Prec@1 82.031 (83.846)
Epoch: [49][220/391]	Time 0.054 (0.059)	Data 0.000 (0.002)	Loss 0.4362 (0.4658)	Prec@1 85.938 (83.884)
Epoch: [49][240/391]	Time 0.050 (0.059)	Data 0.000 (0.002)	Loss 0.4852 (0.4652)	Prec@1 80.469 (83.873)
Epoch: [49][260/391]	Time 0.066 (0.059)	Data 0.000 (0.002)	Loss 0.5858 (0.4661)	Prec@1 77.344 (83.869)
Epoch: [49][280/391]	Time 0.062 (0.059)	Data 0.000 (0.002)	Loss 0.3978 (0.4668)	Prec@1 85.156 (83.869)
Epoch: [49][300/391]	Time 0.056 (0.059)	Data 0.000 (0.002)	Loss 0.4586 (0.4679)	Prec@1 83.594 (83.833)
Epoch: [49][320/391]	Time 0.053 (0.059)	Data 0.000 (0.001)	Loss 0.4778 (0.4672)	Prec@1 84.375 (83.869)
Epoch: [49][340/391]	Time 0.048 (0.058)	Data 0.000 (0.001)	Loss 0.5829 (0

Epoch: [52][360/391]	Time 0.069 (0.056)	Data 0.000 (0.001)	Loss 0.4024 (0.4648)	Prec@1 85.156 (83.877)
Epoch: [52][380/391]	Time 0.054 (0.056)	Data 0.000 (0.001)	Loss 0.4750 (0.4642)	Prec@1 78.906 (83.901)
Test: [0/79]	Time 0.207 (0.207)	Loss 0.5877 (0.5877)	Prec@1 80.469 (80.469)
Test: [20/79]	Time 0.024 (0.028)	Loss 0.5634 (0.6674)	Prec@1 81.250 (78.237)
Test: [40/79]	Time 0.011 (0.022)	Loss 0.6991 (0.6497)	Prec@1 79.688 (78.316)
Test: [60/79]	Time 0.012 (0.020)	Loss 0.7488 (0.6384)	Prec@1 75.000 (78.535)
 * Prec@1 78.370
current lr 1.00000e-01
Epoch: [53][0/391]	Time 0.287 (0.287)	Data 0.220 (0.220)	Loss 0.6313 (0.6313)	Prec@1 77.344 (77.344)
Epoch: [53][20/391]	Time 0.066 (0.068)	Data 0.000 (0.011)	Loss 0.4155 (0.4508)	Prec@1 85.156 (84.115)
Epoch: [53][40/391]	Time 0.056 (0.062)	Data 0.000 (0.006)	Loss 0.3905 (0.4396)	Prec@1 82.812 (84.413)
Epoch: [53][60/391]	Time 0.063 (0.061)	Data 0.000 (0.004)	Loss 0.4393 (0.4414)	Prec@1 85.156 (84.477)
Epoch: [53][80/391]	Time 0.054 (0.060)	D

Epoch: [56][100/391]	Time 0.063 (0.056)	Data 0.000 (0.003)	Loss 0.3735 (0.4484)	Prec@1 88.281 (84.677)
Epoch: [56][120/391]	Time 0.065 (0.056)	Data 0.000 (0.003)	Loss 0.4341 (0.4551)	Prec@1 84.375 (84.472)
Epoch: [56][140/391]	Time 0.055 (0.056)	Data 0.000 (0.002)	Loss 0.4895 (0.4565)	Prec@1 82.812 (84.425)
Epoch: [56][160/391]	Time 0.054 (0.056)	Data 0.000 (0.002)	Loss 0.4484 (0.4569)	Prec@1 87.500 (84.467)
Epoch: [56][180/391]	Time 0.048 (0.056)	Data 0.000 (0.002)	Loss 0.3214 (0.4551)	Prec@1 89.844 (84.582)
Epoch: [56][200/391]	Time 0.062 (0.057)	Data 0.000 (0.002)	Loss 0.7339 (0.4572)	Prec@1 78.906 (84.507)
Epoch: [56][220/391]	Time 0.060 (0.057)	Data 0.000 (0.002)	Loss 0.4900 (0.4590)	Prec@1 85.938 (84.481)
Epoch: [56][240/391]	Time 0.052 (0.057)	Data 0.000 (0.001)	Loss 0.4422 (0.4558)	Prec@1 85.938 (84.579)
Epoch: [56][260/391]	Time 0.056 (0.057)	Data 0.000 (0.001)	Loss 0.4156 (0.4581)	Prec@1 82.812 (84.480)
Epoch: [56][280/391]	Time 0.054 (0.057)	Data 0.000 (0.001)	Loss 0.5964 (0

Epoch: [59][300/391]	Time 0.045 (0.057)	Data 0.000 (0.001)	Loss 0.4578 (0.4444)	Prec@1 81.250 (84.520)
Epoch: [59][320/391]	Time 0.070 (0.056)	Data 0.000 (0.001)	Loss 0.5160 (0.4457)	Prec@1 83.594 (84.458)
Epoch: [59][340/391]	Time 0.051 (0.056)	Data 0.000 (0.001)	Loss 0.5231 (0.4453)	Prec@1 82.031 (84.510)
Epoch: [59][360/391]	Time 0.055 (0.056)	Data 0.000 (0.001)	Loss 0.4425 (0.4461)	Prec@1 83.594 (84.470)
Epoch: [59][380/391]	Time 0.067 (0.057)	Data 0.000 (0.001)	Loss 0.3995 (0.4486)	Prec@1 90.625 (84.400)
Test: [0/79]	Time 0.231 (0.231)	Loss 0.9041 (0.9041)	Prec@1 71.094 (71.094)
Test: [20/79]	Time 0.016 (0.024)	Loss 0.6329 (0.8426)	Prec@1 79.688 (74.144)
Test: [40/79]	Time 0.010 (0.020)	Loss 0.9158 (0.8336)	Prec@1 68.750 (74.676)
Test: [60/79]	Time 0.012 (0.020)	Loss 0.7822 (0.8341)	Prec@1 73.438 (74.360)
 * Prec@1 74.070
current lr 1.00000e-01
Epoch: [60][0/391]	Time 0.456 (0.456)	Data 0.404 (0.404)	Loss 0.3284 (0.3284)	Prec@1 85.156 (85.156)
Epoch: [60][20/391]	Time 0.050 (0.069

Epoch: [63][40/391]	Time 0.047 (0.064)	Data 0.000 (0.006)	Loss 0.4576 (0.4570)	Prec@1 83.594 (84.146)
Epoch: [63][60/391]	Time 0.046 (0.059)	Data 0.000 (0.004)	Loss 0.3944 (0.4530)	Prec@1 84.375 (84.426)
Epoch: [63][80/391]	Time 0.043 (0.057)	Data 0.000 (0.003)	Loss 0.4614 (0.4544)	Prec@1 83.594 (84.356)
Epoch: [63][100/391]	Time 0.043 (0.056)	Data 0.000 (0.003)	Loss 0.3501 (0.4502)	Prec@1 90.625 (84.483)
Epoch: [63][120/391]	Time 0.054 (0.055)	Data 0.000 (0.002)	Loss 0.5199 (0.4558)	Prec@1 77.344 (84.317)
Epoch: [63][140/391]	Time 0.056 (0.054)	Data 0.000 (0.002)	Loss 0.3826 (0.4530)	Prec@1 85.938 (84.464)
Epoch: [63][160/391]	Time 0.047 (0.054)	Data 0.000 (0.002)	Loss 0.4585 (0.4550)	Prec@1 85.938 (84.419)
Epoch: [63][180/391]	Time 0.047 (0.054)	Data 0.000 (0.002)	Loss 0.4393 (0.4511)	Prec@1 83.594 (84.634)
Epoch: [63][200/391]	Time 0.060 (0.055)	Data 0.000 (0.001)	Loss 0.6400 (0.4525)	Prec@1 78.125 (84.550)
Epoch: [63][220/391]	Time 0.056 (0.055)	Data 0.000 (0.001)	Loss 0.4365 (0.45

Epoch: [66][240/391]	Time 0.065 (0.059)	Data 0.000 (0.001)	Loss 0.4024 (0.4405)	Prec@1 89.062 (84.803)
Epoch: [66][260/391]	Time 0.044 (0.059)	Data 0.000 (0.001)	Loss 0.5133 (0.4438)	Prec@1 85.938 (84.731)
Epoch: [66][280/391]	Time 0.048 (0.059)	Data 0.000 (0.001)	Loss 0.4924 (0.4452)	Prec@1 83.594 (84.698)
Epoch: [66][300/391]	Time 0.042 (0.058)	Data 0.000 (0.001)	Loss 0.3636 (0.4450)	Prec@1 89.062 (84.673)
Epoch: [66][320/391]	Time 0.086 (0.058)	Data 0.000 (0.001)	Loss 0.3937 (0.4449)	Prec@1 89.062 (84.684)
Epoch: [66][340/391]	Time 0.047 (0.057)	Data 0.000 (0.001)	Loss 0.4079 (0.4444)	Prec@1 87.500 (84.652)
Epoch: [66][360/391]	Time 0.047 (0.057)	Data 0.000 (0.001)	Loss 0.4808 (0.4450)	Prec@1 84.375 (84.674)
Epoch: [66][380/391]	Time 0.045 (0.057)	Data 0.000 (0.001)	Loss 0.3949 (0.4444)	Prec@1 87.500 (84.691)
Test: [0/79]	Time 0.240 (0.240)	Loss 0.5503 (0.5503)	Prec@1 79.688 (79.688)
Test: [20/79]	Time 0.029 (0.028)	Loss 0.5021 (0.6408)	Prec@1 83.594 (78.534)
Test: [40/79]	Time 0.01

Test: [60/79]	Time 0.014 (0.020)	Loss 0.5810 (0.6660)	Prec@1 78.125 (78.740)
 * Prec@1 79.100
current lr 1.00000e-01
Epoch: [70][0/391]	Time 0.323 (0.323)	Data 0.260 (0.260)	Loss 0.5261 (0.5261)	Prec@1 84.375 (84.375)
Epoch: [70][20/391]	Time 0.044 (0.069)	Data 0.000 (0.013)	Loss 0.3642 (0.4325)	Prec@1 86.719 (85.640)
Epoch: [70][40/391]	Time 0.052 (0.063)	Data 0.000 (0.007)	Loss 0.6120 (0.4314)	Prec@1 78.906 (85.518)
Epoch: [70][60/391]	Time 0.056 (0.061)	Data 0.000 (0.005)	Loss 0.4612 (0.4344)	Prec@1 82.812 (85.259)
Epoch: [70][80/391]	Time 0.065 (0.060)	Data 0.000 (0.004)	Loss 0.6159 (0.4304)	Prec@1 78.906 (85.233)
Epoch: [70][100/391]	Time 0.060 (0.060)	Data 0.000 (0.003)	Loss 0.4956 (0.4393)	Prec@1 81.250 (84.932)
Epoch: [70][120/391]	Time 0.046 (0.058)	Data 0.000 (0.002)	Loss 0.6517 (0.4455)	Prec@1 79.688 (84.756)
Epoch: [70][140/391]	Time 0.046 (0.057)	Data 0.000 (0.002)	Loss 0.4245 (0.4460)	Prec@1 85.156 (84.608)
Epoch: [70][160/391]	Time 0.048 (0.056)	Data 0.000 (0.002)	Loss 0

Epoch: [73][180/391]	Time 0.054 (0.060)	Data 0.000 (0.002)	Loss 0.3442 (0.4426)	Prec@1 85.938 (84.867)
Epoch: [73][200/391]	Time 0.049 (0.060)	Data 0.000 (0.002)	Loss 0.4286 (0.4431)	Prec@1 84.375 (84.876)
Epoch: [73][220/391]	Time 0.069 (0.059)	Data 0.000 (0.002)	Loss 0.4102 (0.4432)	Prec@1 83.594 (84.810)
Epoch: [73][240/391]	Time 0.057 (0.059)	Data 0.000 (0.002)	Loss 0.4320 (0.4427)	Prec@1 87.500 (84.796)
Epoch: [73][260/391]	Time 0.048 (0.059)	Data 0.000 (0.002)	Loss 0.4277 (0.4434)	Prec@1 86.719 (84.767)
Epoch: [73][280/391]	Time 0.069 (0.059)	Data 0.000 (0.002)	Loss 0.3928 (0.4423)	Prec@1 88.281 (84.742)
Epoch: [73][300/391]	Time 0.057 (0.059)	Data 0.000 (0.001)	Loss 0.4175 (0.4418)	Prec@1 85.156 (84.780)
Epoch: [73][320/391]	Time 0.056 (0.059)	Data 0.000 (0.001)	Loss 0.4034 (0.4438)	Prec@1 85.156 (84.711)
Epoch: [73][340/391]	Time 0.050 (0.058)	Data 0.000 (0.001)	Loss 0.5173 (0.4440)	Prec@1 83.594 (84.682)
Epoch: [73][360/391]	Time 0.047 (0.058)	Data 0.000 (0.001)	Loss 0.3453 (0

Epoch: [76][380/391]	Time 0.055 (0.056)	Data 0.000 (0.001)	Loss 0.3477 (0.4389)	Prec@1 90.625 (84.863)
Test: [0/79]	Time 0.224 (0.224)	Loss 0.6020 (0.6020)	Prec@1 77.344 (77.344)
Test: [20/79]	Time 0.016 (0.029)	Loss 0.6019 (0.6042)	Prec@1 80.469 (79.204)
Test: [40/79]	Time 0.018 (0.022)	Loss 0.7373 (0.5970)	Prec@1 76.562 (79.840)
Test: [60/79]	Time 0.012 (0.020)	Loss 0.4761 (0.5837)	Prec@1 81.250 (80.225)
 * Prec@1 80.100
current lr 1.00000e-01
Epoch: [77][0/391]	Time 0.299 (0.299)	Data 0.241 (0.241)	Loss 0.2536 (0.2536)	Prec@1 92.969 (92.969)
Epoch: [77][20/391]	Time 0.059 (0.068)	Data 0.000 (0.012)	Loss 0.4127 (0.4113)	Prec@1 85.938 (86.049)
Epoch: [77][40/391]	Time 0.052 (0.063)	Data 0.000 (0.006)	Loss 0.3101 (0.4260)	Prec@1 89.062 (85.652)
Epoch: [77][60/391]	Time 0.068 (0.062)	Data 0.000 (0.004)	Loss 0.4386 (0.4346)	Prec@1 83.594 (85.489)
Epoch: [77][80/391]	Time 0.060 (0.060)	Data 0.000 (0.003)	Loss 0.4607 (0.4424)	Prec@1 84.375 (85.233)
Epoch: [77][100/391]	Time 0.052 (0.060)	D

Epoch: [80][120/391]	Time 0.064 (0.060)	Data 0.000 (0.003)	Loss 0.5242 (0.4309)	Prec@1 82.031 (85.240)
Epoch: [80][140/391]	Time 0.050 (0.060)	Data 0.000 (0.003)	Loss 0.3636 (0.4269)	Prec@1 89.062 (85.439)
Epoch: [80][160/391]	Time 0.056 (0.059)	Data 0.000 (0.002)	Loss 0.5696 (0.4288)	Prec@1 83.594 (85.365)
Epoch: [80][180/391]	Time 0.046 (0.059)	Data 0.000 (0.002)	Loss 0.4127 (0.4310)	Prec@1 86.719 (85.256)
Epoch: [80][200/391]	Time 0.066 (0.059)	Data 0.000 (0.002)	Loss 0.4748 (0.4334)	Prec@1 84.375 (85.137)
Epoch: [80][220/391]	Time 0.060 (0.059)	Data 0.000 (0.002)	Loss 0.5020 (0.4339)	Prec@1 83.594 (85.114)
Epoch: [80][240/391]	Time 0.051 (0.059)	Data 0.000 (0.002)	Loss 0.4483 (0.4352)	Prec@1 82.812 (85.030)
Epoch: [80][260/391]	Time 0.054 (0.059)	Data 0.000 (0.002)	Loss 0.4247 (0.4366)	Prec@1 85.156 (85.004)
Epoch: [80][280/391]	Time 0.052 (0.059)	Data 0.000 (0.001)	Loss 0.4154 (0.4375)	Prec@1 84.375 (84.976)
Epoch: [80][300/391]	Time 0.057 (0.059)	Data 0.000 (0.001)	Loss 0.4655 (0

Epoch: [83][320/391]	Time 0.059 (0.058)	Data 0.000 (0.001)	Loss 0.4608 (0.4331)	Prec@1 82.031 (85.198)
Epoch: [83][340/391]	Time 0.063 (0.058)	Data 0.000 (0.001)	Loss 0.4031 (0.4328)	Prec@1 84.375 (85.209)
Epoch: [83][360/391]	Time 0.068 (0.058)	Data 0.000 (0.001)	Loss 0.3935 (0.4325)	Prec@1 84.375 (85.193)
Epoch: [83][380/391]	Time 0.048 (0.058)	Data 0.000 (0.001)	Loss 0.4498 (0.4334)	Prec@1 85.938 (85.167)
Test: [0/79]	Time 0.283 (0.283)	Loss 0.5063 (0.5063)	Prec@1 81.250 (81.250)
Test: [20/79]	Time 0.020 (0.028)	Loss 0.6786 (0.6078)	Prec@1 78.125 (79.799)
Test: [40/79]	Time 0.011 (0.022)	Loss 0.8262 (0.6021)	Prec@1 74.219 (80.354)
Test: [60/79]	Time 0.017 (0.020)	Loss 0.5882 (0.5891)	Prec@1 78.906 (80.853)
 * Prec@1 80.890
current lr 1.00000e-01
Epoch: [84][0/391]	Time 0.374 (0.374)	Data 0.314 (0.314)	Loss 0.4414 (0.4414)	Prec@1 85.938 (85.938)
Epoch: [84][20/391]	Time 0.068 (0.073)	Data 0.000 (0.015)	Loss 0.4914 (0.4178)	Prec@1 82.031 (85.417)
Epoch: [84][40/391]	Time 0.046 (0.065)

Epoch: [87][60/391]	Time 0.061 (0.062)	Data 0.000 (0.004)	Loss 0.3932 (0.4208)	Prec@1 86.719 (85.694)
Epoch: [87][80/391]	Time 0.062 (0.061)	Data 0.000 (0.003)	Loss 0.3792 (0.4171)	Prec@1 85.156 (85.783)
Epoch: [87][100/391]	Time 0.055 (0.060)	Data 0.000 (0.003)	Loss 0.4312 (0.4207)	Prec@1 85.156 (85.605)
Epoch: [87][120/391]	Time 0.057 (0.060)	Data 0.000 (0.002)	Loss 0.3276 (0.4144)	Prec@1 90.625 (85.705)
Epoch: [87][140/391]	Time 0.058 (0.060)	Data 0.000 (0.002)	Loss 0.4009 (0.4167)	Prec@1 85.938 (85.516)
Epoch: [87][160/391]	Time 0.058 (0.059)	Data 0.000 (0.002)	Loss 0.4662 (0.4148)	Prec@1 83.594 (85.569)
Epoch: [87][180/391]	Time 0.060 (0.059)	Data 0.000 (0.002)	Loss 0.3825 (0.4137)	Prec@1 90.625 (85.597)
Epoch: [87][200/391]	Time 0.066 (0.059)	Data 0.000 (0.001)	Loss 0.4470 (0.4159)	Prec@1 85.156 (85.549)
Epoch: [87][220/391]	Time 0.056 (0.059)	Data 0.000 (0.001)	Loss 0.4607 (0.4198)	Prec@1 80.469 (85.382)
Epoch: [87][240/391]	Time 0.050 (0.058)	Data 0.000 (0.001)	Loss 0.4509 (0.4

Epoch: [90][260/391]	Time 0.053 (0.055)	Data 0.000 (0.001)	Loss 0.4693 (0.4324)	Prec@1 82.031 (85.162)
Epoch: [90][280/391]	Time 0.046 (0.055)	Data 0.000 (0.001)	Loss 0.4077 (0.4332)	Prec@1 85.156 (85.162)
Epoch: [90][300/391]	Time 0.072 (0.055)	Data 0.000 (0.001)	Loss 0.4622 (0.4340)	Prec@1 81.250 (85.112)
Epoch: [90][320/391]	Time 0.068 (0.055)	Data 0.000 (0.001)	Loss 0.5346 (0.4354)	Prec@1 85.938 (85.069)
Epoch: [90][340/391]	Time 0.046 (0.056)	Data 0.000 (0.001)	Loss 0.3489 (0.4364)	Prec@1 88.281 (84.991)
Epoch: [90][360/391]	Time 0.055 (0.056)	Data 0.000 (0.001)	Loss 0.3659 (0.4353)	Prec@1 89.062 (85.050)
Epoch: [90][380/391]	Time 0.056 (0.056)	Data 0.000 (0.001)	Loss 0.4822 (0.4363)	Prec@1 89.062 (85.017)
Test: [0/79]	Time 0.232 (0.232)	Loss 0.8425 (0.8425)	Prec@1 77.344 (77.344)
Test: [20/79]	Time 0.014 (0.026)	Loss 0.9787 (0.8337)	Prec@1 76.562 (74.442)
Test: [40/79]	Time 0.014 (0.021)	Loss 1.0252 (0.8209)	Prec@1 71.875 (75.019)
Test: [60/79]	Time 0.014 (0.019)	Loss 0.7100 (0.8

 * Prec@1 77.360
current lr 1.00000e-01
Epoch: [94][0/391]	Time 0.369 (0.369)	Data 0.312 (0.312)	Loss 0.3547 (0.3547)	Prec@1 83.594 (83.594)
Epoch: [94][20/391]	Time 0.049 (0.072)	Data 0.000 (0.015)	Loss 0.4455 (0.4112)	Prec@1 89.844 (85.342)
Epoch: [94][40/391]	Time 0.048 (0.065)	Data 0.000 (0.008)	Loss 0.5533 (0.4166)	Prec@1 81.250 (85.366)
Epoch: [94][60/391]	Time 0.056 (0.062)	Data 0.000 (0.005)	Loss 0.4584 (0.4202)	Prec@1 84.375 (85.374)
Epoch: [94][80/391]	Time 0.062 (0.061)	Data 0.000 (0.004)	Loss 0.4169 (0.4199)	Prec@1 87.500 (85.619)
Epoch: [94][100/391]	Time 0.064 (0.060)	Data 0.000 (0.003)	Loss 0.5253 (0.4268)	Prec@1 84.375 (85.466)
Epoch: [94][120/391]	Time 0.057 (0.060)	Data 0.000 (0.003)	Loss 0.6281 (0.4318)	Prec@1 81.250 (85.324)
Epoch: [94][140/391]	Time 0.059 (0.059)	Data 0.000 (0.003)	Loss 0.4894 (0.4298)	Prec@1 86.719 (85.278)
Epoch: [94][160/391]	Time 0.057 (0.059)	Data 0.000 (0.002)	Loss 0.3359 (0.4277)	Prec@1 89.844 (85.321)
Epoch: [94][180/391]	Time 0.061 (0.059)

Epoch: [97][200/391]	Time 0.065 (0.055)	Data 0.000 (0.001)	Loss 0.4062 (0.4206)	Prec@1 85.938 (85.479)
Epoch: [97][220/391]	Time 0.062 (0.055)	Data 0.000 (0.001)	Loss 0.4421 (0.4222)	Prec@1 88.281 (85.453)
Epoch: [97][240/391]	Time 0.059 (0.056)	Data 0.000 (0.001)	Loss 0.3986 (0.4244)	Prec@1 85.156 (85.403)
Epoch: [97][260/391]	Time 0.061 (0.056)	Data 0.000 (0.001)	Loss 0.6039 (0.4258)	Prec@1 77.344 (85.387)
Epoch: [97][280/391]	Time 0.061 (0.056)	Data 0.000 (0.001)	Loss 0.4370 (0.4286)	Prec@1 85.156 (85.292)
Epoch: [97][300/391]	Time 0.063 (0.056)	Data 0.000 (0.001)	Loss 0.5042 (0.4286)	Prec@1 83.594 (85.294)
Epoch: [97][320/391]	Time 0.059 (0.056)	Data 0.000 (0.001)	Loss 0.5756 (0.4289)	Prec@1 82.031 (85.295)
Epoch: [97][340/391]	Time 0.044 (0.056)	Data 0.000 (0.001)	Loss 0.5089 (0.4295)	Prec@1 85.156 (85.321)
Epoch: [97][360/391]	Time 0.049 (0.055)	Data 0.000 (0.001)	Loss 0.4986 (0.4306)	Prec@1 82.031 (85.264)
Epoch: [97][380/391]	Time 0.039 (0.054)	Data 0.000 (0.001)	Loss 0.4233 (0

Test: [0/79]	Time 0.221 (0.221)	Loss 0.2099 (0.2099)	Prec@1 91.406 (91.406)
Test: [20/79]	Time 0.011 (0.024)	Loss 0.2564 (0.3006)	Prec@1 89.844 (89.583)
Test: [40/79]	Time 0.014 (0.019)	Loss 0.3860 (0.2996)	Prec@1 89.062 (89.653)
Test: [60/79]	Time 0.016 (0.017)	Loss 0.2392 (0.2935)	Prec@1 92.188 (89.780)
 * Prec@1 89.920
current lr 1.00000e-02
Epoch: [101][0/391]	Time 0.354 (0.354)	Data 0.295 (0.295)	Loss 0.4191 (0.4191)	Prec@1 83.594 (83.594)
Epoch: [101][20/391]	Time 0.032 (0.058)	Data 0.000 (0.014)	Loss 0.2127 (0.2556)	Prec@1 90.625 (90.960)
Epoch: [101][40/391]	Time 0.038 (0.050)	Data 0.000 (0.007)	Loss 0.3280 (0.2457)	Prec@1 91.406 (91.578)
Epoch: [101][60/391]	Time 0.041 (0.047)	Data 0.000 (0.005)	Loss 0.2423 (0.2503)	Prec@1 91.406 (91.534)
Epoch: [101][80/391]	Time 0.045 (0.045)	Data 0.002 (0.004)	Loss 0.2843 (0.2509)	Prec@1 91.406 (91.474)
Epoch: [101][100/391]	Time 0.036 (0.045)	Data 0.000 (0.003)	Loss 0.2695 (0.2502)	Prec@1 90.625 (91.491)
Epoch: [101][120/391]	Time 0.042 (0

KeyboardInterrupt: 