# Efficient Channel Attention for Deep Convolutional Neural Networks

For a more detailed breakdown of ECA-Net, check out the [full tutorial on the blog](https://blog.paperspace.com/attention-mechanisms-in-computer-vision-ecanet/).

In [2]:
# Model Parameter and FLOP counter
# !pip install --upgrade git+https://github.com/sovrasov/flops-counter.pytorch.git

In [3]:
!nvidia-smi

Tue Sep 29 16:43:34 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 450.36.06    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro P6000        On   | 00000000:00:05.0 Off |                  Off |
| 26%   28C    P8     9W / 250W |      1MiB / 24449MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
import time
import torch
import torch.nn as nn
from ptflops import get_model_complexity_info
import torch.nn.functional as F
import torch.nn.init as init
import os
import shutil
import time
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
import numpy as np
import math
from torch.nn.parameter import Parameter

In [5]:
best_prec1 = 0
evaluate = True

In [6]:
def main():
    global best_prec1, evaluate

    __all__ = ['ResNet', 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet1202']


    def _weights_init(m):
        classname = m.__class__.__name__
        #print(classname)
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
            init.kaiming_normal_(m.weight)


    class LambdaLayer(nn.Module):
        def __init__(self, lambd):
            super(LambdaLayer, self).__init__()
            self.lambd = lambd

        def forward(self, x):
            return self.lambd(x)

    ### Efficient Channel Attention Class definition

    class ECA(nn.Module):
        """Constructs a ECA module.
        Args:
            channel: Number of channels of the input feature map
        """
        
        def __init__(self, channel, k_size=3):
            super(ECA, self).__init__()
            self.avg_pool = nn.AdaptiveAvgPool2d(1)
            self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 

        def forward(self, x):
            # feature descriptor on the global spatial information
            y = self.avg_pool(x)

            # Two different branches of ECA module
            y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)

            # Multi-scale information fusion
            y = torch.sigmoid(y)

            return x * y.expand_as(x)


    class BasicBlock(nn.Module):
        expansion = 1

        def __init__(self, in_planes, planes, stride=1, option='A', use_eca = True):
            super(BasicBlock, self).__init__()
            self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(planes)
            self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn2 = nn.BatchNorm2d(planes)
            self.use_eca = use_eca
            if self.use_eca == True:
                self.eca = ECA(planes)

            self.shortcut = nn.Sequential()
            if stride != 1 or in_planes != planes:
                if option == 'A':
                    """
                    For CIFAR10 ResNet paper uses option A.
                    """
                    self.shortcut = LambdaLayer(lambda x:
                                                F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
                elif option == 'B':
                    self.shortcut = nn.Sequential(
                        nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                        nn.BatchNorm2d(self.expansion * planes)
                    )
            

        def forward(self, x):
            out = F.relu(self.bn1(self.conv1(x)))
            out = self.bn2(self.conv2(out))
            out += self.shortcut(x)
            out = F.relu(out)
            if self.use_eca == True:
                out = self.eca(out)
            return out


    class ResNet(nn.Module):
        def __init__(self, block, num_blocks, num_classes=10):
            super(ResNet, self).__init__()
            self.in_planes = 16

            self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(16)
            self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
            self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
            self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
            self.linear = nn.Linear(64, num_classes)

            self.apply(_weights_init)

        def _make_layer(self, block, planes, num_blocks, stride):
            strides = [stride] + [1]*(num_blocks-1)
            layers = []
            for stride in strides:
                layers.append(block(self.in_planes, planes, stride))
                self.in_planes = planes * block.expansion

            return nn.Sequential(*layers)

        def forward(self, x):
            out = F.relu(self.bn1(self.conv1(x)))
            out = self.layer1(out)
            out = self.layer2(out)
            out = self.layer3(out)
            out = F.avg_pool2d(out, out.size()[3])
            out = out.view(out.size(0), -1)
            out = self.linear(out)
            return out


    def resnet20():
        return ResNet(BasicBlock, [3, 3, 3])


    def resnet32():
        return ResNet(BasicBlock, [5, 5, 5])


    def resnet44():
        return ResNet(BasicBlock, [7, 7, 7])


    def resnet56():
        return ResNet(BasicBlock, [9, 9, 9])


    def resnet110():
        return ResNet(BasicBlock, [18, 18, 18])


    def resnet1202():
        return ResNet(BasicBlock, [200, 200, 200])

    model = resnet20()

    with torch.cuda.device(0):
      flops, params = get_model_complexity_info(model, (3, 224, 224), as_strings=True, print_per_layer_stat=True)
      print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
      print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    model.cuda()

    cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize,
        ]), download=True),
        batch_size=128, shuffle=True,
        num_workers=4, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=128, shuffle=False,
        num_workers=4, pin_memory=True)

    # define loss function (criterion) and pptimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(), 0.1,
                                momentum=0.9,
                                weight_decay=5e-4)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        milestones=[100, 150], last_epoch=0 - 1)


    for epoch in range(0, 200):

        # train for one epoch
        print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
        train(train_loader, model, criterion, optimizer, epoch)
        lr_scheduler.step()

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        if epoch > 0 and epoch % 20 == 0:
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best, filename=os.path.join('./', 'vanilla_checkpoint.th'))

        save_checkpoint({
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        }, is_best, filename=os.path.join('./', 'vanilla_model.th'))



def train(train_loader, model, criterion, optimizer, epoch):
    """
        Run one train epoch
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda()
        input_var = input.cuda()
        target_var = target

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        output = output.float()
        loss = loss.float()
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 20 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1))


def validate(val_loader, model, criterion):
    """
    Run evaluation
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            target = target.cuda()
            input_var = input.cuda()
            target_var = target.cuda()


            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            prec1 = accuracy(output.data, target)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % 20 == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i, len(val_loader), batch_time=batch_time, loss=losses,
                          top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'
          .format(top1=top1))

    return top1.avg

def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    """
    Save the training model
    """
    torch.save(state, filename)

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res



In [7]:
if __name__ == '__main__':
    main()

ResNet(
  0.27 M, 100.000% Params, 2.01 GMac, 100.000% MACs, 
  (conv1): Conv2d(0.0 M, 0.160% Params, 0.022 GMac, 1.079% MACs, 3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(0.0 M, 0.012% Params, 0.002 GMac, 0.080% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    0.014 M, 5.199% Params, 0.706 GMac, 35.114% MACs, 
    (0): BasicBlock(
      0.005 M, 1.733% Params, 0.235 GMac, 11.705% MACs, 
      (conv1): Conv2d(0.002 M, 0.854% Params, 0.116 GMac, 5.753% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(0.0 M, 0.012% Params, 0.002 GMac, 0.080% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(0.002 M, 0.854% Params, 0.116 GMac, 5.753% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(0.0 M, 0.012% Params, 0.002 GMac, 0.080% MACs, 16, eps=1e-05,

Files already downloaded and verified
current lr 1.00000e-01
Epoch: [0][0/391]	Time 0.378 (0.378)	Data 0.255 (0.255)	Loss 2.4339 (2.4339)	Prec@1 8.594 (8.594)
Epoch: [0][20/391]	Time 0.021 (0.042)	Data 0.000 (0.012)	Loss 2.0308 (2.1952)	Prec@1 25.000 (16.295)
Epoch: [0][40/391]	Time 0.021 (0.032)	Data 0.000 (0.006)	Loss 1.8924 (2.0718)	Prec@1 25.781 (20.255)
Epoch: [0][60/391]	Time 0.022 (0.029)	Data 0.000 (0.004)	Loss 1.7715 (2.0103)	Prec@1 34.375 (22.720)
Epoch: [0][80/391]	Time 0.022 (0.028)	Data 0.000 (0.003)	Loss 1.8657 (1.9650)	Prec@1 28.125 (24.132)
Epoch: [0][100/391]	Time 0.021 (0.027)	Data 0.000 (0.003)	Loss 1.6365 (1.9258)	Prec@1 34.375 (25.541)
Epoch: [0][120/391]	Time 0.022 (0.026)	Data 0.000 (0.002)	Loss 1.7019 (1.9023)	Prec@1 31.250 (26.575)
Epoch: [0][140/391]	Time 0.022 (0.026)	Data 0.000 (0.002)	Loss 1.6815 (1.8738)	Prec@1 32.812 (27.826)
Epoch: [0][160/391]	Time 0.022 (0.025)	Data 0.000 (0.002)	Loss 1.7697 (1.8504)	Prec@1 32.812 (28.571)
Epoch: [0][180/391]	Time 0.02

Epoch: [3][200/391]	Time 0.022 (0.025)	Data 0.000 (0.002)	Loss 0.7276 (0.8512)	Prec@1 73.438 (70.083)
Epoch: [3][220/391]	Time 0.022 (0.025)	Data 0.000 (0.001)	Loss 0.7757 (0.8464)	Prec@1 71.094 (70.093)
Epoch: [3][240/391]	Time 0.025 (0.025)	Data 0.000 (0.001)	Loss 0.9748 (0.8441)	Prec@1 66.406 (70.199)
Epoch: [3][260/391]	Time 0.022 (0.025)	Data 0.000 (0.001)	Loss 0.7466 (0.8430)	Prec@1 76.562 (70.301)
Epoch: [3][280/391]	Time 0.021 (0.025)	Data 0.000 (0.001)	Loss 0.8581 (0.8438)	Prec@1 73.438 (70.360)
Epoch: [3][300/391]	Time 0.022 (0.025)	Data 0.000 (0.001)	Loss 0.6902 (0.8399)	Prec@1 76.562 (70.525)
Epoch: [3][320/391]	Time 0.020 (0.024)	Data 0.000 (0.001)	Loss 0.9517 (0.8379)	Prec@1 64.062 (70.522)
Epoch: [3][340/391]	Time 0.024 (0.024)	Data 0.000 (0.001)	Loss 0.7552 (0.8352)	Prec@1 75.000 (70.645)
Epoch: [3][360/391]	Time 0.021 (0.024)	Data 0.000 (0.001)	Loss 0.8480 (0.8351)	Prec@1 70.312 (70.704)
Epoch: [3][380/391]	Time 0.021 (0.024)	Data 0.000 (0.001)	Loss 0.8613 (0.8327)	Pre

Test: [40/79]	Time 0.006 (0.014)	Loss 0.8925 (0.8743)	Prec@1 67.188 (70.941)
Test: [60/79]	Time 0.015 (0.013)	Loss 0.9379 (0.8846)	Prec@1 74.219 (70.978)
 * Prec@1 71.310
current lr 1.00000e-01
Epoch: [7][0/391]	Time 0.273 (0.273)	Data 0.245 (0.245)	Loss 0.6776 (0.6776)	Prec@1 75.781 (75.781)
Epoch: [7][20/391]	Time 0.023 (0.035)	Data 0.000 (0.012)	Loss 0.4902 (0.6414)	Prec@1 85.156 (77.753)
Epoch: [7][40/391]	Time 0.026 (0.029)	Data 0.000 (0.006)	Loss 0.5918 (0.6201)	Prec@1 82.031 (78.125)
Epoch: [7][60/391]	Time 0.021 (0.027)	Data 0.000 (0.004)	Loss 0.6444 (0.6222)	Prec@1 78.125 (78.291)
Epoch: [7][80/391]	Time 0.022 (0.026)	Data 0.000 (0.003)	Loss 0.7014 (0.6208)	Prec@1 77.344 (78.540)
Epoch: [7][100/391]	Time 0.029 (0.025)	Data 0.000 (0.003)	Loss 0.7566 (0.6259)	Prec@1 73.438 (78.349)
Epoch: [7][120/391]	Time 0.022 (0.025)	Data 0.000 (0.002)	Loss 0.7111 (0.6307)	Prec@1 79.688 (78.190)
Epoch: [7][140/391]	Time 0.022 (0.024)	Data 0.000 (0.002)	Loss 0.6947 (0.6381)	Prec@1 77.344 (77.8

Epoch: [10][180/391]	Time 0.047 (0.052)	Data 0.000 (0.002)	Loss 0.7501 (0.5960)	Prec@1 75.000 (79.437)
Epoch: [10][200/391]	Time 0.050 (0.052)	Data 0.000 (0.002)	Loss 0.3927 (0.5969)	Prec@1 84.375 (79.345)
Epoch: [10][220/391]	Time 0.058 (0.052)	Data 0.000 (0.002)	Loss 0.4517 (0.5948)	Prec@1 85.156 (79.486)
Epoch: [10][240/391]	Time 0.051 (0.052)	Data 0.000 (0.001)	Loss 0.5507 (0.5921)	Prec@1 81.250 (79.545)
Epoch: [10][260/391]	Time 0.038 (0.052)	Data 0.000 (0.001)	Loss 0.6213 (0.5957)	Prec@1 78.125 (79.442)
Epoch: [10][280/391]	Time 0.054 (0.051)	Data 0.000 (0.001)	Loss 0.6590 (0.5947)	Prec@1 78.125 (79.440)
Epoch: [10][300/391]	Time 0.046 (0.051)	Data 0.000 (0.001)	Loss 0.5054 (0.5937)	Prec@1 80.469 (79.498)
Epoch: [10][320/391]	Time 0.035 (0.051)	Data 0.000 (0.001)	Loss 0.5170 (0.5943)	Prec@1 84.375 (79.468)
Epoch: [10][340/391]	Time 0.045 (0.050)	Data 0.000 (0.001)	Loss 0.7630 (0.5954)	Prec@1 75.781 (79.447)
Epoch: [10][360/391]	Time 0.040 (0.050)	Data 0.000 (0.001)	Loss 0.6007 (0

Epoch: [13][380/391]	Time 0.141 (0.125)	Data 0.000 (0.001)	Loss 0.6289 (0.5558)	Prec@1 80.469 (81.051)
Test: [0/79]	Time 0.271 (0.271)	Loss 0.7842 (0.7842)	Prec@1 68.750 (68.750)
Test: [20/79]	Time 0.022 (0.036)	Loss 0.7045 (0.7997)	Prec@1 78.125 (72.545)
Test: [40/79]	Time 0.024 (0.029)	Loss 0.8845 (0.8105)	Prec@1 68.750 (72.923)
Test: [60/79]	Time 0.022 (0.027)	Loss 0.8269 (0.8250)	Prec@1 68.750 (72.503)
 * Prec@1 72.550
current lr 1.00000e-01
Epoch: [14][0/391]	Time 0.478 (0.478)	Data 0.332 (0.332)	Loss 0.5280 (0.5280)	Prec@1 81.250 (81.250)
Epoch: [14][20/391]	Time 0.144 (0.152)	Data 0.000 (0.016)	Loss 0.6125 (0.5569)	Prec@1 75.781 (80.506)
Epoch: [14][40/391]	Time 0.136 (0.143)	Data 0.000 (0.008)	Loss 0.6714 (0.5554)	Prec@1 77.344 (80.736)
Epoch: [14][60/391]	Time 0.144 (0.141)	Data 0.000 (0.006)	Loss 0.4979 (0.5529)	Prec@1 81.250 (80.866)
Epoch: [14][80/391]	Time 0.131 (0.140)	Data 0.000 (0.004)	Loss 0.4240 (0.5485)	Prec@1 84.375 (81.182)
Epoch: [14][100/391]	Time 0.106 (0.136)	D

Epoch: [17][120/391]	Time 0.142 (0.127)	Data 0.000 (0.003)	Loss 0.6342 (0.5327)	Prec@1 75.000 (81.650)
Epoch: [17][140/391]	Time 0.147 (0.128)	Data 0.000 (0.003)	Loss 0.5484 (0.5310)	Prec@1 82.031 (81.704)
Epoch: [17][160/391]	Time 0.141 (0.129)	Data 0.000 (0.003)	Loss 0.5022 (0.5286)	Prec@1 78.906 (81.764)
Epoch: [17][180/391]	Time 0.121 (0.129)	Data 0.000 (0.002)	Loss 0.3584 (0.5278)	Prec@1 90.625 (81.872)
Epoch: [17][200/391]	Time 0.138 (0.128)	Data 0.000 (0.002)	Loss 0.5031 (0.5278)	Prec@1 83.594 (81.868)
Epoch: [17][220/391]	Time 0.134 (0.129)	Data 0.000 (0.002)	Loss 0.4929 (0.5292)	Prec@1 81.250 (81.851)
Epoch: [17][240/391]	Time 0.115 (0.127)	Data 0.000 (0.002)	Loss 0.5237 (0.5290)	Prec@1 82.031 (81.830)
Epoch: [17][260/391]	Time 0.072 (0.126)	Data 0.000 (0.002)	Loss 0.4191 (0.5303)	Prec@1 86.719 (81.780)
Epoch: [17][280/391]	Time 0.144 (0.126)	Data 0.000 (0.002)	Loss 0.4936 (0.5310)	Prec@1 83.594 (81.748)
Epoch: [17][300/391]	Time 0.140 (0.127)	Data 0.000 (0.002)	Loss 0.4656 (0

Epoch: [20][320/391]	Time 0.145 (0.129)	Data 0.000 (0.001)	Loss 0.4125 (0.5202)	Prec@1 85.156 (82.387)
Epoch: [20][340/391]	Time 0.141 (0.130)	Data 0.000 (0.001)	Loss 0.3729 (0.5199)	Prec@1 89.062 (82.375)
Epoch: [20][360/391]	Time 0.100 (0.127)	Data 0.000 (0.001)	Loss 0.6150 (0.5220)	Prec@1 78.906 (82.271)
Epoch: [20][380/391]	Time 0.100 (0.125)	Data 0.000 (0.001)	Loss 0.6205 (0.5232)	Prec@1 73.438 (82.189)
Test: [0/79]	Time 0.316 (0.316)	Loss 0.7534 (0.7534)	Prec@1 70.312 (70.312)
Test: [20/79]	Time 0.017 (0.035)	Loss 0.8342 (0.8012)	Prec@1 71.875 (72.396)
Test: [40/79]	Time 0.017 (0.028)	Loss 0.8356 (0.7957)	Prec@1 72.656 (72.732)
Test: [60/79]	Time 0.016 (0.025)	Loss 0.7171 (0.7993)	Prec@1 72.656 (72.554)
 * Prec@1 72.160
current lr 1.00000e-01
Epoch: [21][0/391]	Time 0.491 (0.491)	Data 0.380 (0.380)	Loss 0.5809 (0.5809)	Prec@1 83.594 (83.594)
Epoch: [21][20/391]	Time 0.134 (0.151)	Data 0.000 (0.018)	Loss 0.3551 (0.4979)	Prec@1 87.500 (83.371)
Epoch: [21][40/391]	Time 0.134 (0.143)

Epoch: [24][60/391]	Time 0.121 (0.135)	Data 0.000 (0.006)	Loss 0.4451 (0.4781)	Prec@1 79.688 (83.837)
Epoch: [24][80/391]	Time 0.107 (0.130)	Data 0.000 (0.005)	Loss 0.4625 (0.4791)	Prec@1 85.156 (83.748)
Epoch: [24][100/391]	Time 0.140 (0.126)	Data 0.000 (0.004)	Loss 0.4536 (0.4738)	Prec@1 83.594 (83.787)
Epoch: [24][120/391]	Time 0.131 (0.128)	Data 0.000 (0.003)	Loss 0.4663 (0.4859)	Prec@1 82.812 (83.348)
Epoch: [24][140/391]	Time 0.090 (0.128)	Data 0.000 (0.003)	Loss 0.4743 (0.4889)	Prec@1 82.812 (83.283)
Epoch: [24][160/391]	Time 0.140 (0.126)	Data 0.000 (0.003)	Loss 0.5849 (0.4935)	Prec@1 82.031 (83.070)
Epoch: [24][180/391]	Time 0.142 (0.127)	Data 0.000 (0.002)	Loss 0.4240 (0.4978)	Prec@1 86.719 (83.002)
Epoch: [24][200/391]	Time 0.134 (0.128)	Data 0.000 (0.002)	Loss 0.4067 (0.4965)	Prec@1 85.156 (82.991)
Epoch: [24][220/391]	Time 0.135 (0.129)	Data 0.000 (0.002)	Loss 0.4952 (0.4980)	Prec@1 82.812 (82.947)
Epoch: [24][240/391]	Time 0.125 (0.129)	Data 0.000 (0.002)	Loss 0.5203 (0.4

Epoch: [27][260/391]	Time 0.110 (0.122)	Data 0.000 (0.002)	Loss 0.5163 (0.5036)	Prec@1 85.938 (82.827)
Epoch: [27][280/391]	Time 0.139 (0.123)	Data 0.000 (0.002)	Loss 0.4102 (0.5000)	Prec@1 89.844 (82.957)
Epoch: [27][300/391]	Time 0.137 (0.124)	Data 0.000 (0.001)	Loss 0.5296 (0.5004)	Prec@1 80.469 (82.906)
Epoch: [27][320/391]	Time 0.138 (0.125)	Data 0.000 (0.001)	Loss 0.4818 (0.5019)	Prec@1 82.031 (82.839)
Epoch: [27][340/391]	Time 0.140 (0.125)	Data 0.000 (0.001)	Loss 0.4906 (0.5022)	Prec@1 78.125 (82.790)
Epoch: [27][360/391]	Time 0.140 (0.126)	Data 0.000 (0.001)	Loss 0.6190 (0.5032)	Prec@1 78.125 (82.748)
Epoch: [27][380/391]	Time 0.141 (0.126)	Data 0.000 (0.001)	Loss 0.5539 (0.5027)	Prec@1 81.250 (82.755)
Test: [0/79]	Time 0.285 (0.285)	Loss 0.6626 (0.6626)	Prec@1 81.250 (81.250)
Test: [20/79]	Time 0.019 (0.036)	Loss 0.6799 (0.6614)	Prec@1 80.469 (77.976)
Test: [40/79]	Time 0.021 (0.030)	Loss 0.7043 (0.6563)	Prec@1 75.000 (77.915)
Test: [60/79]	Time 0.014 (0.027)	Loss 0.6084 (0.6

Epoch: [33][40/391]	Time 0.117 (0.138)	Data 0.000 (0.008)	Loss 0.5033 (0.4649)	Prec@1 85.938 (83.613)
Epoch: [33][60/391]	Time 0.134 (0.131)	Data 0.000 (0.005)	Loss 0.4900 (0.4655)	Prec@1 84.375 (83.735)
Epoch: [33][80/391]	Time 0.085 (0.130)	Data 0.000 (0.004)	Loss 0.3498 (0.4656)	Prec@1 89.844 (83.709)
Epoch: [33][100/391]	Time 0.110 (0.126)	Data 0.000 (0.003)	Loss 0.5218 (0.4636)	Prec@1 82.812 (83.888)
Epoch: [33][120/391]	Time 0.127 (0.122)	Data 0.000 (0.003)	Loss 0.4661 (0.4666)	Prec@1 82.031 (83.826)
Epoch: [33][140/391]	Time 0.140 (0.124)	Data 0.000 (0.002)	Loss 0.4392 (0.4682)	Prec@1 82.031 (83.804)
Epoch: [33][160/391]	Time 0.133 (0.126)	Data 0.000 (0.002)	Loss 0.5126 (0.4724)	Prec@1 79.688 (83.671)
Epoch: [33][180/391]	Time 0.135 (0.127)	Data 0.000 (0.002)	Loss 0.4824 (0.4713)	Prec@1 85.938 (83.805)
Epoch: [33][200/391]	Time 0.141 (0.128)	Data 0.000 (0.002)	Loss 0.4834 (0.4726)	Prec@1 82.031 (83.761)
Epoch: [33][220/391]	Time 0.088 (0.128)	Data 0.000 (0.002)	Loss 0.3812 (0.47

Epoch: [36][240/391]	Time 0.069 (0.124)	Data 0.000 (0.002)	Loss 0.4905 (0.4666)	Prec@1 81.250 (83.947)
Epoch: [36][260/391]	Time 0.135 (0.125)	Data 0.000 (0.002)	Loss 0.4437 (0.4686)	Prec@1 84.375 (83.872)
Epoch: [36][280/391]	Time 0.137 (0.125)	Data 0.000 (0.002)	Loss 0.4226 (0.4711)	Prec@1 85.156 (83.755)
Epoch: [36][300/391]	Time 0.139 (0.126)	Data 0.000 (0.002)	Loss 0.3335 (0.4745)	Prec@1 90.625 (83.669)
Epoch: [36][320/391]	Time 0.080 (0.125)	Data 0.000 (0.001)	Loss 0.5380 (0.4733)	Prec@1 79.688 (83.732)
Epoch: [36][340/391]	Time 0.135 (0.126)	Data 0.000 (0.001)	Loss 0.4636 (0.4733)	Prec@1 84.375 (83.763)
Epoch: [36][360/391]	Time 0.140 (0.126)	Data 0.000 (0.001)	Loss 0.4272 (0.4723)	Prec@1 86.719 (83.845)
Epoch: [36][380/391]	Time 0.144 (0.127)	Data 0.000 (0.001)	Loss 0.6004 (0.4724)	Prec@1 76.562 (83.860)
Test: [0/79]	Time 0.316 (0.316)	Loss 0.5599 (0.5599)	Prec@1 80.469 (80.469)
Test: [20/79]	Time 0.023 (0.038)	Loss 0.6557 (0.6380)	Prec@1 80.469 (78.051)
Test: [40/79]	Time 0.02

Test: [60/79]	Time 0.032 (0.024)	Loss 0.5627 (0.7032)	Prec@1 82.812 (76.972)
 * Prec@1 76.610
current lr 1.00000e-01
Epoch: [40][0/391]	Time 0.485 (0.485)	Data 0.398 (0.398)	Loss 0.4009 (0.4009)	Prec@1 88.281 (88.281)
Epoch: [40][20/391]	Time 0.131 (0.136)	Data 0.000 (0.019)	Loss 0.4806 (0.4339)	Prec@1 85.156 (85.342)
Epoch: [40][40/391]	Time 0.142 (0.137)	Data 0.000 (0.010)	Loss 0.4692 (0.4323)	Prec@1 85.156 (85.347)
Epoch: [40][60/391]	Time 0.142 (0.136)	Data 0.000 (0.007)	Loss 0.5920 (0.4539)	Prec@1 75.781 (84.465)
Epoch: [40][80/391]	Time 0.144 (0.136)	Data 0.000 (0.005)	Loss 0.5483 (0.4646)	Prec@1 78.125 (84.105)
Epoch: [40][100/391]	Time 0.143 (0.136)	Data 0.000 (0.004)	Loss 0.5366 (0.4667)	Prec@1 79.688 (84.073)
Epoch: [40][120/391]	Time 0.139 (0.136)	Data 0.000 (0.004)	Loss 0.4685 (0.4632)	Prec@1 85.938 (84.317)
Epoch: [40][140/391]	Time 0.136 (0.136)	Data 0.000 (0.003)	Loss 0.4612 (0.4596)	Prec@1 83.594 (84.314)
Epoch: [40][160/391]	Time 0.137 (0.136)	Data 0.000 (0.003)	Loss 0

Epoch: [43][180/391]	Time 0.135 (0.127)	Data 0.000 (0.002)	Loss 0.5553 (0.4543)	Prec@1 81.250 (84.556)
Epoch: [43][200/391]	Time 0.140 (0.127)	Data 0.000 (0.002)	Loss 0.4913 (0.4537)	Prec@1 80.469 (84.499)
Epoch: [43][220/391]	Time 0.134 (0.128)	Data 0.000 (0.002)	Loss 0.4271 (0.4574)	Prec@1 83.594 (84.329)
Epoch: [43][240/391]	Time 0.132 (0.129)	Data 0.000 (0.002)	Loss 0.4024 (0.4568)	Prec@1 82.812 (84.330)
Epoch: [43][260/391]	Time 0.109 (0.128)	Data 0.000 (0.002)	Loss 0.4154 (0.4575)	Prec@1 82.812 (84.285)
Epoch: [43][280/391]	Time 0.134 (0.127)	Data 0.000 (0.002)	Loss 0.3714 (0.4563)	Prec@1 82.031 (84.292)
Epoch: [43][300/391]	Time 0.119 (0.127)	Data 0.000 (0.001)	Loss 0.4136 (0.4566)	Prec@1 85.156 (84.284)
Epoch: [43][320/391]	Time 0.088 (0.127)	Data 0.000 (0.001)	Loss 0.4460 (0.4577)	Prec@1 85.156 (84.248)
Epoch: [43][340/391]	Time 0.123 (0.126)	Data 0.000 (0.001)	Loss 0.5492 (0.4580)	Prec@1 83.594 (84.242)
Epoch: [43][360/391]	Time 0.049 (0.125)	Data 0.000 (0.001)	Loss 0.4535 (0

Epoch: [46][380/391]	Time 0.135 (0.124)	Data 0.000 (0.001)	Loss 0.5181 (0.4622)	Prec@1 83.594 (84.086)
Test: [0/79]	Time 0.319 (0.319)	Loss 0.6417 (0.6417)	Prec@1 78.906 (78.906)
Test: [20/79]	Time 0.024 (0.036)	Loss 0.6155 (0.6777)	Prec@1 78.906 (76.972)
Test: [40/79]	Time 0.023 (0.030)	Loss 0.7053 (0.6489)	Prec@1 76.562 (77.973)
Test: [60/79]	Time 0.018 (0.027)	Loss 0.5490 (0.6463)	Prec@1 82.031 (78.176)
 * Prec@1 78.100
current lr 1.00000e-01
Epoch: [47][0/391]	Time 0.510 (0.510)	Data 0.361 (0.361)	Loss 0.3976 (0.3976)	Prec@1 87.500 (87.500)
Epoch: [47][20/391]	Time 0.136 (0.154)	Data 0.000 (0.017)	Loss 0.4342 (0.4406)	Prec@1 84.375 (84.933)
Epoch: [47][40/391]	Time 0.145 (0.146)	Data 0.000 (0.009)	Loss 0.4998 (0.4389)	Prec@1 85.156 (85.194)
Epoch: [47][60/391]	Time 0.141 (0.143)	Data 0.001 (0.006)	Loss 0.4048 (0.4454)	Prec@1 89.062 (85.272)
Epoch: [47][80/391]	Time 0.112 (0.137)	Data 0.000 (0.005)	Loss 0.4188 (0.4486)	Prec@1 84.375 (84.877)
Epoch: [47][100/391]	Time 0.105 (0.132)	D

Epoch: [50][120/391]	Time 0.133 (0.119)	Data 0.000 (0.003)	Loss 0.4721 (0.4434)	Prec@1 82.031 (84.672)
Epoch: [50][140/391]	Time 0.135 (0.122)	Data 0.000 (0.002)	Loss 0.3638 (0.4448)	Prec@1 85.156 (84.719)
Epoch: [50][160/391]	Time 0.126 (0.124)	Data 0.000 (0.002)	Loss 0.4090 (0.4464)	Prec@1 86.719 (84.729)
Epoch: [50][180/391]	Time 0.136 (0.125)	Data 0.000 (0.002)	Loss 0.3624 (0.4513)	Prec@1 88.281 (84.595)
Epoch: [50][200/391]	Time 0.113 (0.124)	Data 0.000 (0.002)	Loss 0.5371 (0.4497)	Prec@1 80.469 (84.577)
Epoch: [50][220/391]	Time 0.120 (0.123)	Data 0.000 (0.002)	Loss 0.2997 (0.4482)	Prec@1 91.406 (84.615)
Epoch: [50][240/391]	Time 0.119 (0.120)	Data 0.000 (0.002)	Loss 0.5147 (0.4513)	Prec@1 80.469 (84.514)
Epoch: [50][260/391]	Time 0.137 (0.121)	Data 0.000 (0.001)	Loss 0.3843 (0.4506)	Prec@1 83.594 (84.558)
Epoch: [50][280/391]	Time 0.136 (0.122)	Data 0.000 (0.001)	Loss 0.3409 (0.4539)	Prec@1 88.281 (84.414)
Epoch: [50][300/391]	Time 0.118 (0.123)	Data 0.000 (0.001)	Loss 0.3855 (0

Epoch: [53][320/391]	Time 0.100 (0.131)	Data 0.000 (0.001)	Loss 0.6177 (0.4490)	Prec@1 80.469 (84.514)
Epoch: [53][340/391]	Time 0.114 (0.129)	Data 0.000 (0.001)	Loss 0.5084 (0.4502)	Prec@1 82.812 (84.496)
Epoch: [53][360/391]	Time 0.139 (0.128)	Data 0.000 (0.001)	Loss 0.4623 (0.4520)	Prec@1 82.031 (84.412)
Epoch: [53][380/391]	Time 0.132 (0.128)	Data 0.000 (0.001)	Loss 0.4330 (0.4518)	Prec@1 84.375 (84.434)
Test: [0/79]	Time 0.378 (0.378)	Loss 0.5488 (0.5488)	Prec@1 81.250 (81.250)
Test: [20/79]	Time 0.023 (0.037)	Loss 0.4173 (0.6061)	Prec@1 83.594 (79.948)
Test: [40/79]	Time 0.018 (0.029)	Loss 0.5716 (0.5842)	Prec@1 80.469 (80.431)
Test: [60/79]	Time 0.018 (0.026)	Loss 0.5199 (0.5793)	Prec@1 82.031 (80.418)
 * Prec@1 80.340
current lr 1.00000e-01
Epoch: [54][0/391]	Time 0.388 (0.388)	Data 0.314 (0.314)	Loss 0.3868 (0.3868)	Prec@1 87.500 (87.500)
Epoch: [54][20/391]	Time 0.129 (0.132)	Data 0.000 (0.015)	Loss 0.3936 (0.4108)	Prec@1 84.375 (85.975)
Epoch: [54][40/391]	Time 0.133 (0.124)

Epoch: [57][60/391]	Time 0.053 (0.115)	Data 0.000 (0.006)	Loss 0.5523 (0.4364)	Prec@1 82.031 (84.772)
Epoch: [57][80/391]	Time 0.134 (0.120)	Data 0.000 (0.005)	Loss 0.3848 (0.4386)	Prec@1 84.375 (84.626)
Epoch: [57][100/391]	Time 0.102 (0.122)	Data 0.000 (0.004)	Loss 0.3437 (0.4410)	Prec@1 89.062 (84.553)
Epoch: [57][120/391]	Time 0.129 (0.125)	Data 0.000 (0.003)	Loss 0.5307 (0.4442)	Prec@1 82.812 (84.601)
Epoch: [57][140/391]	Time 0.139 (0.126)	Data 0.000 (0.003)	Loss 0.5095 (0.4508)	Prec@1 81.250 (84.403)
Epoch: [57][160/391]	Time 0.094 (0.125)	Data 0.000 (0.002)	Loss 0.3691 (0.4487)	Prec@1 84.375 (84.448)
Epoch: [57][180/391]	Time 0.087 (0.120)	Data 0.000 (0.002)	Loss 0.5225 (0.4455)	Prec@1 82.812 (84.612)
Epoch: [57][200/391]	Time 0.082 (0.115)	Data 0.000 (0.002)	Loss 0.3052 (0.4462)	Prec@1 89.844 (84.573)
Epoch: [57][220/391]	Time 0.087 (0.110)	Data 0.000 (0.002)	Loss 0.3693 (0.4457)	Prec@1 89.844 (84.665)
Epoch: [57][240/391]	Time 0.092 (0.108)	Data 0.000 (0.002)	Loss 0.3624 (0.4

Epoch: [60][260/391]	Time 0.022 (0.040)	Data 0.000 (0.001)	Loss 0.4456 (0.4432)	Prec@1 86.719 (84.812)
Epoch: [60][280/391]	Time 0.022 (0.039)	Data 0.000 (0.001)	Loss 0.5033 (0.4430)	Prec@1 84.375 (84.828)
Epoch: [60][300/391]	Time 0.023 (0.038)	Data 0.000 (0.001)	Loss 0.5300 (0.4426)	Prec@1 84.375 (84.847)
Epoch: [60][320/391]	Time 0.023 (0.037)	Data 0.000 (0.001)	Loss 0.3191 (0.4418)	Prec@1 89.062 (84.903)
Epoch: [60][340/391]	Time 0.030 (0.036)	Data 0.000 (0.001)	Loss 0.4131 (0.4437)	Prec@1 85.156 (84.819)
Epoch: [60][360/391]	Time 0.023 (0.036)	Data 0.000 (0.001)	Loss 0.3547 (0.4439)	Prec@1 86.719 (84.821)
Epoch: [60][380/391]	Time 0.021 (0.035)	Data 0.000 (0.001)	Loss 0.6393 (0.4460)	Prec@1 78.125 (84.746)
Test: [0/79]	Time 0.240 (0.240)	Loss 0.5157 (0.5157)	Prec@1 80.469 (80.469)
Test: [20/79]	Time 0.011 (0.020)	Loss 0.5273 (0.6299)	Prec@1 81.250 (78.460)
Test: [40/79]	Time 0.008 (0.014)	Loss 0.6823 (0.6187)	Prec@1 72.656 (78.716)
Test: [60/79]	Time 0.016 (0.012)	Loss 0.4401 (0.6

KeyboardInterrupt: 