from google.colab import drive

# Accessing My Google Drive
drive.mount('/content/drive')
datapath="drive/My Drive/MLVC_competition"

!unzip '/content/drive/MyDrive/MLVC_competition/dataset.zip'

# Define Model

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class LeNet_STL10(nn.Module):
    def __init__(self):
        super(LeNet_STL10, self).__init__()
        self.num_classes = 10

        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5, stride=1)
        self.pool1 = nn.MaxPool2d((2, 2), stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)
        self.pool2 = nn.MaxPool2d((2, 2), stride=2)
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, self.num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)

        x = self.relu(self.conv2(x))
        x = self.pool2(x)
        x = torch.flatten(x, 1)

        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)

        return x


def Model():
    """Return your custom model
    """
    return LeNet_STL10()

In [4]:
_net = Model()
print(_net)
pytorch_total_params = sum(p.numel() for p in _net.parameters())
print(f"Number of parameters: {pytorch_total_params}")
for idx, p in enumerate(_net.parameters()):
    print(idx, p.numel())

LeNet_STL10(
  (relu): ReLU(inplace=True)
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
Number of parameters: 62006
0 450
1 6
2 2400
3 16
4 48000
5 120
6 10080
7 84
8 840
9 10


In [2]:
'''ResNet in PyTorch.
For Pre-activation ResNet, see 'preact_resnet.py'.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''



class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])


def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])


def ResNet101():
    return ResNet(Bottleneck, [3, 4, 23, 3])


def ResNet152():
    return ResNet(Bottleneck, [3, 8, 36, 3])


def test():
    net = ResNet18()
    y = net(torch.randn(1, 3, 32, 32))
    print(y.size())

# test()

# Utils

In [3]:
class AverageMeter(object):
    r"""Computes and stores the average and current value
    """
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, *meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def print(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


def accuracy(output, target, topk=(1,)):
    r"""Computes the accuracy over the $k$ top predictions for the specified values of k
    """
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        # _, pred = output.topk(maxk, 1, True, True)
        # pred = pred.t()
        # correct = pred.eq(target.view(1, -1).expand_as(pred))

        # faster topk (ref: https://github.com/pytorch/pytorch/issues/22812)
        _, idx = output.sort(descending=True)
        pred = idx[:,:maxk]
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []

        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

#Hyperparameter

In [9]:
SAVEPATH = './weight'
WEIGHTDECAY = 5e-4
MOMENTUM = 0.9
BATCHSIZE = 256
LR = 0.1
EPOCHS = 200
PRINTFREQ = 10

#Train Model

In [10]:
import time

import torch
import torch.nn as nn

import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader


def main():
    model = ResNet18()

    ##### optimizer / learning rate scheduler / criterion #####
    optimizer = torch.optim.SGD(model.parameters(), lr=LR,
                                momentum=MOMENTUM, weight_decay=WEIGHTDECAY,
                                nesterov=True)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [100, 150],
                                                     gamma=0.1)
    criterion = torch.nn.CrossEntropyLoss()
    ###########################################################

    model = model.cuda()
    criterion = criterion.cuda()

    # Check number of parameters your model
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print(f"Number of parameters: {pytorch_total_params}")
    """
    if int(pytorch_total_params) > 2000000:
        print('Your model has the number of parameters more than 2 millions..')
        return
    """
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ])

    valid_transform = transforms.Compose([
        transforms.ToTensor(),
        normalize
    ])

    train_dataset = torchvision.datasets.ImageFolder(
        './train', transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCHSIZE, shuffle=True,
                              num_workers=4, pin_memory=True)
    
    val_dataset = torchvision.datasets.ImageFolder('./valid', transform=valid_transform)

    last_top1_acc = 0
    for epoch in range(EPOCHS):
        print("\n----- epoch: {}, lr: {} -----".format(
            epoch, optimizer.param_groups[0]["lr"]))

        # train for one epoch
        start_time = time.time()
        last_top1_acc = train(train_loader, epoch, model, optimizer, criterion)
        elapsed_time = time.time() - start_time
        print('==> {:.2f} seconds to train this epoch\n'.format(
            elapsed_time))

        # learning rate scheduling
        scheduler.step()

        # Save model each epoch
        torch.save(model.state_dict(), SAVEPATH+'model_weight.pth')

    print(f"Last Top-1 Accuracy: {last_top1_acc}")
    print(f"Number of parameters: {pytorch_total_params}")



def train(train_loader, epoch, model, optimizer, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(train_loader), batch_time, data_time, losses,
                             top1, top5, prefix="Epoch: [{}]".format(epoch))
    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        input = input.cuda()
        target = target.cuda()

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss, accuracy 
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(acc1[0].item(), input.size(0))
        top5.update(acc5[0].item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINTFREQ == 0:
            progress.print(i)

    print('=> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))
    return top1 .avg


if __name__ == "__main__":
    main()

Number of parameters: 11173962

----- epoch: 0, lr: 0.1 -----
Epoch: [0][  0/352]	Time  0.364 ( 0.364)	Data  0.261 ( 0.261)	Loss 2.3800e+00 (2.3800e+00)	Acc@1   8.20 (  8.20)	Acc@5  50.78 ( 50.78)
Epoch: [0][ 10/352]	Time  0.241 ( 0.261)	Data  0.000 ( 0.024)	Loss 3.0662e+00 (4.8805e+00)	Acc@1   8.20 (  8.77)	Acc@5  47.66 ( 48.93)
Epoch: [0][ 20/352]	Time  0.243 ( 0.254)	Data  0.000 ( 0.013)	Loss 2.3759e+00 (3.7215e+00)	Acc@1  12.50 ( 10.12)	Acc@5  51.95 ( 50.99)
Epoch: [0][ 30/352]	Time  0.248 ( 0.251)	Data  0.000 ( 0.009)	Loss 2.2686e+00 (3.2591e+00)	Acc@1  13.67 ( 11.49)	Acc@5  68.36 ( 53.77)
Epoch: [0][ 40/352]	Time  0.239 ( 0.249)	Data  0.000 ( 0.007)	Loss 2.1406e+00 (3.0007e+00)	Acc@1  25.39 ( 12.98)	Acc@5  73.44 ( 56.90)
Epoch: [0][ 50/352]	Time  0.242 ( 0.248)	Data  0.000 ( 0.005)	Loss 2.0763e+00 (2.8308e+00)	Acc@1  20.31 ( 14.37)	Acc@5  77.73 ( 59.96)
Epoch: [0][ 60/352]	Time  0.246 ( 0.248)	Data  0.000 ( 0.004)	Loss 2.0512e+00 (2.7103e+00)	Acc@1  21.88 ( 15.36)	Acc@5  76.56 ( 

Epoch: [1][240/352]	Time  0.248 ( 0.251)	Data  0.000 ( 0.001)	Loss 1.4329e+00 (1.4886e+00)	Acc@1  48.44 ( 44.64)	Acc@5  92.58 ( 91.98)
Epoch: [1][250/352]	Time  0.246 ( 0.251)	Data  0.000 ( 0.001)	Loss 1.3426e+00 (1.4854e+00)	Acc@1  53.52 ( 44.83)	Acc@5  92.19 ( 92.01)
Epoch: [1][260/352]	Time  0.246 ( 0.251)	Data  0.000 ( 0.001)	Loss 1.4832e+00 (1.4823e+00)	Acc@1  42.97 ( 44.91)	Acc@5  91.02 ( 92.06)
Epoch: [1][270/352]	Time  0.250 ( 0.251)	Data  0.000 ( 0.001)	Loss 1.3037e+00 (1.4783e+00)	Acc@1  54.69 ( 45.05)	Acc@5  94.14 ( 92.11)
Epoch: [1][280/352]	Time  0.252 ( 0.251)	Data  0.000 ( 0.001)	Loss 1.4434e+00 (1.4735e+00)	Acc@1  49.22 ( 45.28)	Acc@5  92.19 ( 92.17)
Epoch: [1][290/352]	Time  0.244 ( 0.251)	Data  0.000 ( 0.001)	Loss 1.4144e+00 (1.4686e+00)	Acc@1  47.27 ( 45.45)	Acc@5  93.36 ( 92.24)
Epoch: [1][300/352]	Time  0.248 ( 0.251)	Data  0.000 ( 0.001)	Loss 1.2835e+00 (1.4642e+00)	Acc@1  55.08 ( 45.64)	Acc@5  94.53 ( 92.30)
Epoch: [1][310/352]	Time  0.239 ( 0.251)	Data  0.000 ( 

Epoch: [3][120/352]	Time  0.251 ( 0.248)	Data  0.000 ( 0.002)	Loss 1.1795e+00 (1.1802e+00)	Acc@1  59.77 ( 57.10)	Acc@5  95.31 ( 95.53)
Epoch: [3][130/352]	Time  0.245 ( 0.248)	Data  0.000 ( 0.002)	Loss 1.2133e+00 (1.1783e+00)	Acc@1  56.25 ( 57.17)	Acc@5  94.92 ( 95.53)
Epoch: [3][140/352]	Time  0.241 ( 0.248)	Data  0.000 ( 0.002)	Loss 1.1934e+00 (1.1761e+00)	Acc@1  56.64 ( 57.26)	Acc@5  96.48 ( 95.57)
Epoch: [3][150/352]	Time  0.242 ( 0.248)	Data  0.000 ( 0.002)	Loss 1.1728e+00 (1.1721e+00)	Acc@1  60.16 ( 57.36)	Acc@5  93.75 ( 95.61)
Epoch: [3][160/352]	Time  0.253 ( 0.248)	Data  0.000 ( 0.002)	Loss 1.2039e+00 (1.1717e+00)	Acc@1  55.86 ( 57.38)	Acc@5  96.48 ( 95.65)
Epoch: [3][170/352]	Time  0.238 ( 0.248)	Data  0.000 ( 0.002)	Loss 1.0715e+00 (1.1695e+00)	Acc@1  57.42 ( 57.41)	Acc@5  96.88 ( 95.62)
Epoch: [3][180/352]	Time  0.253 ( 0.248)	Data  0.000 ( 0.001)	Loss 1.1629e+00 (1.1679e+00)	Acc@1  56.25 ( 57.43)	Acc@5  95.70 ( 95.64)
Epoch: [3][190/352]	Time  0.245 ( 0.248)	Data  0.000 ( 

=> Acc@1 62.078 Acc@5 96.516
==> 84.43 seconds to train this epoch


----- epoch: 5, lr: 0.1 -----
Epoch: [5][  0/352]	Time  0.298 ( 0.298)	Data  0.212 ( 0.212)	Loss 1.0134e+00 (1.0134e+00)	Acc@1  62.89 ( 62.89)	Acc@5  96.48 ( 96.48)
Epoch: [5][ 10/352]	Time  0.237 ( 0.230)	Data  0.000 ( 0.019)	Loss 1.0596e+00 (1.0322e+00)	Acc@1  57.81 ( 63.10)	Acc@5  96.88 ( 96.59)
Epoch: [5][ 20/352]	Time  0.228 ( 0.225)	Data  0.000 ( 0.010)	Loss 1.0958e+00 (1.0310e+00)	Acc@1  58.20 ( 62.69)	Acc@5  96.88 ( 96.84)
Epoch: [5][ 30/352]	Time  0.222 ( 0.224)	Data  0.000 ( 0.007)	Loss 9.4771e-01 (1.0115e+00)	Acc@1  66.80 ( 63.63)	Acc@5  96.48 ( 96.82)
Epoch: [5][ 40/352]	Time  0.222 ( 0.224)	Data  0.000 ( 0.005)	Loss 1.0849e+00 (1.0025e+00)	Acc@1  59.77 ( 63.97)	Acc@5  96.09 ( 96.88)
Epoch: [5][ 50/352]	Time  0.220 ( 0.223)	Data  0.000 ( 0.004)	Loss 1.0700e+00 (1.0026e+00)	Acc@1  64.84 ( 63.78)	Acc@5  95.31 ( 96.88)
Epoch: [5][ 60/352]	Time  0.214 ( 0.223)	Data  0.000 ( 0.004)	Loss 9.4027e-01 (9.9669e-01)	

Epoch: [6][240/352]	Time  0.225 ( 0.222)	Data  0.000 ( 0.001)	Loss 8.2915e-01 (9.0779e-01)	Acc@1  70.70 ( 67.36)	Acc@5  96.88 ( 97.38)
Epoch: [6][250/352]	Time  0.220 ( 0.222)	Data  0.000 ( 0.001)	Loss 9.6750e-01 (9.0774e-01)	Acc@1  67.97 ( 67.38)	Acc@5  94.92 ( 97.37)
Epoch: [6][260/352]	Time  0.229 ( 0.222)	Data  0.000 ( 0.001)	Loss 9.2116e-01 (9.0799e-01)	Acc@1  65.62 ( 67.38)	Acc@5  98.44 ( 97.39)
Epoch: [6][270/352]	Time  0.217 ( 0.222)	Data  0.000 ( 0.001)	Loss 7.6377e-01 (9.0745e-01)	Acc@1  71.88 ( 67.40)	Acc@5  98.83 ( 97.40)
Epoch: [6][280/352]	Time  0.224 ( 0.222)	Data  0.000 ( 0.001)	Loss 1.0242e+00 (9.0626e-01)	Acc@1  65.62 ( 67.45)	Acc@5  93.36 ( 97.41)
Epoch: [6][290/352]	Time  0.229 ( 0.222)	Data  0.000 ( 0.001)	Loss 8.5438e-01 (9.0524e-01)	Acc@1  70.31 ( 67.51)	Acc@5  97.66 ( 97.41)
Epoch: [6][300/352]	Time  0.230 ( 0.222)	Data  0.000 ( 0.001)	Loss 8.5688e-01 (9.0452e-01)	Acc@1  69.92 ( 67.58)	Acc@5  97.27 ( 97.41)
Epoch: [6][310/352]	Time  0.211 ( 0.223)	Data  0.000 ( 

Epoch: [8][120/352]	Time  0.225 ( 0.222)	Data  0.000 ( 0.002)	Loss 7.4022e-01 (7.8831e-01)	Acc@1  73.44 ( 71.80)	Acc@5  98.05 ( 97.98)
Epoch: [8][130/352]	Time  0.218 ( 0.222)	Data  0.000 ( 0.002)	Loss 7.9274e-01 (7.8644e-01)	Acc@1  73.05 ( 71.84)	Acc@5  98.83 ( 98.01)
Epoch: [8][140/352]	Time  0.219 ( 0.222)	Data  0.000 ( 0.002)	Loss 8.0312e-01 (7.8574e-01)	Acc@1  71.09 ( 71.88)	Acc@5  96.88 ( 97.97)
Epoch: [8][150/352]	Time  0.222 ( 0.222)	Data  0.000 ( 0.002)	Loss 8.6363e-01 (7.8899e-01)	Acc@1  68.75 ( 71.77)	Acc@5  98.83 ( 97.97)
Epoch: [8][160/352]	Time  0.222 ( 0.222)	Data  0.000 ( 0.002)	Loss 8.2880e-01 (7.8749e-01)	Acc@1  71.88 ( 71.85)	Acc@5  96.88 ( 97.99)
Epoch: [8][170/352]	Time  0.209 ( 0.222)	Data  0.000 ( 0.002)	Loss 7.5629e-01 (7.8893e-01)	Acc@1  69.14 ( 71.80)	Acc@5  99.22 ( 97.96)
Epoch: [8][180/352]	Time  0.219 ( 0.222)	Data  0.000 ( 0.001)	Loss 9.0745e-01 (7.9021e-01)	Acc@1  65.23 ( 71.75)	Acc@5  98.05 ( 97.96)
Epoch: [8][190/352]	Time  0.221 ( 0.222)	Data  0.000 ( 

=> Acc@1 73.364 Acc@5 98.077
==> 79.19 seconds to train this epoch


----- epoch: 10, lr: 0.1 -----
Epoch: [10][  0/352]	Time  0.327 ( 0.327)	Data  0.236 ( 0.236)	Loss 6.6187e-01 (6.6187e-01)	Acc@1  74.61 ( 74.61)	Acc@5  99.22 ( 99.22)
Epoch: [10][ 10/352]	Time  0.223 ( 0.231)	Data  0.000 ( 0.022)	Loss 7.6940e-01 (7.2113e-01)	Acc@1  73.05 ( 74.25)	Acc@5  98.44 ( 98.40)
Epoch: [10][ 20/352]	Time  0.220 ( 0.226)	Data  0.000 ( 0.011)	Loss 7.2416e-01 (7.1439e-01)	Acc@1  74.61 ( 74.72)	Acc@5  98.05 ( 98.34)
Epoch: [10][ 30/352]	Time  0.222 ( 0.225)	Data  0.000 ( 0.008)	Loss 7.0671e-01 (7.1734e-01)	Acc@1  74.22 ( 74.71)	Acc@5  98.44 ( 98.25)
Epoch: [10][ 40/352]	Time  0.219 ( 0.224)	Data  0.000 ( 0.006)	Loss 6.8340e-01 (7.0674e-01)	Acc@1  75.39 ( 75.29)	Acc@5  96.88 ( 98.31)
Epoch: [10][ 50/352]	Time  0.224 ( 0.223)	Data  0.000 ( 0.005)	Loss 6.8195e-01 (7.1391e-01)	Acc@1  76.17 ( 74.84)	Acc@5  98.83 ( 98.31)
Epoch: [10][ 60/352]	Time  0.228 ( 0.224)	Data  0.000 ( 0.004)	Loss 7.2363e-01 (7.14

Epoch: [11][230/352]	Time  0.224 ( 0.222)	Data  0.000 ( 0.001)	Loss 6.5659e-01 (7.0576e-01)	Acc@1  78.12 ( 74.99)	Acc@5  98.83 ( 98.30)
Epoch: [11][240/352]	Time  0.219 ( 0.222)	Data  0.000 ( 0.001)	Loss 7.5366e-01 (7.0560e-01)	Acc@1  73.05 ( 74.99)	Acc@5  98.05 ( 98.31)
Epoch: [11][250/352]	Time  0.217 ( 0.222)	Data  0.000 ( 0.001)	Loss 7.8633e-01 (7.0634e-01)	Acc@1  73.05 ( 74.95)	Acc@5  97.66 ( 98.29)
Epoch: [11][260/352]	Time  0.227 ( 0.222)	Data  0.000 ( 0.001)	Loss 7.0152e-01 (7.0757e-01)	Acc@1  76.17 ( 74.94)	Acc@5  97.66 ( 98.28)
Epoch: [11][270/352]	Time  0.226 ( 0.222)	Data  0.000 ( 0.001)	Loss 7.5647e-01 (7.0801e-01)	Acc@1  72.66 ( 74.91)	Acc@5  96.48 ( 98.27)
Epoch: [11][280/352]	Time  0.227 ( 0.222)	Data  0.000 ( 0.001)	Loss 7.0066e-01 (7.0779e-01)	Acc@1  76.17 ( 74.91)	Acc@5  98.05 ( 98.27)
Epoch: [11][290/352]	Time  0.216 ( 0.222)	Data  0.000 ( 0.001)	Loss 6.6543e-01 (7.0713e-01)	Acc@1  75.78 ( 74.93)	Acc@5  97.66 ( 98.28)
Epoch: [11][300/352]	Time  0.224 ( 0.222)	Data  

Epoch: [13][100/352]	Time  0.217 ( 0.225)	Data  0.000 ( 0.003)	Loss 5.6695e-01 (6.6298e-01)	Acc@1  78.91 ( 76.47)	Acc@5  99.61 ( 98.45)
Epoch: [13][110/352]	Time  0.212 ( 0.224)	Data  0.000 ( 0.002)	Loss 6.6069e-01 (6.6337e-01)	Acc@1  78.52 ( 76.43)	Acc@5  99.61 ( 98.47)
Epoch: [13][120/352]	Time  0.225 ( 0.224)	Data  0.000 ( 0.002)	Loss 8.2573e-01 (6.6515e-01)	Acc@1  70.70 ( 76.41)	Acc@5  99.22 ( 98.49)
Epoch: [13][130/352]	Time  0.217 ( 0.224)	Data  0.000 ( 0.002)	Loss 6.1107e-01 (6.6500e-01)	Acc@1  77.34 ( 76.40)	Acc@5  99.61 ( 98.53)
Epoch: [13][140/352]	Time  0.222 ( 0.224)	Data  0.000 ( 0.002)	Loss 6.5304e-01 (6.6547e-01)	Acc@1  77.34 ( 76.35)	Acc@5  98.83 ( 98.55)
Epoch: [13][150/352]	Time  0.219 ( 0.224)	Data  0.000 ( 0.002)	Loss 5.6573e-01 (6.6845e-01)	Acc@1  78.52 ( 76.25)	Acc@5  99.22 ( 98.54)
Epoch: [13][160/352]	Time  0.226 ( 0.224)	Data  0.000 ( 0.002)	Loss 6.8766e-01 (6.6747e-01)	Acc@1  75.00 ( 76.28)	Acc@5  98.44 ( 98.54)
Epoch: [13][170/352]	Time  0.217 ( 0.224)	Data  

Epoch: [14][340/352]	Time  0.233 ( 0.223)	Data  0.000 ( 0.001)	Loss 6.6941e-01 (6.5635e-01)	Acc@1  77.34 ( 76.73)	Acc@5  97.27 ( 98.51)
Epoch: [14][350/352]	Time  0.222 ( 0.223)	Data  0.000 ( 0.001)	Loss 7.0227e-01 (6.5636e-01)	Acc@1  75.39 ( 76.75)	Acc@5  98.83 ( 98.51)
=> Acc@1 76.743 Acc@5 98.509
==> 78.34 seconds to train this epoch


----- epoch: 15, lr: 0.1 -----
Epoch: [15][  0/352]	Time  0.328 ( 0.328)	Data  0.236 ( 0.236)	Loss 6.0160e-01 (6.0160e-01)	Acc@1  80.47 ( 80.47)	Acc@5  98.05 ( 98.05)
Epoch: [15][ 10/352]	Time  0.221 ( 0.231)	Data  0.000 ( 0.022)	Loss 5.5578e-01 (6.1743e-01)	Acc@1  80.86 ( 78.62)	Acc@5  99.61 ( 98.47)
Epoch: [15][ 20/352]	Time  0.219 ( 0.226)	Data  0.000 ( 0.011)	Loss 5.5964e-01 (6.1879e-01)	Acc@1  76.95 ( 78.29)	Acc@5  98.44 ( 98.51)
Epoch: [15][ 30/352]	Time  0.221 ( 0.225)	Data  0.000 ( 0.008)	Loss 6.7660e-01 (6.2657e-01)	Acc@1  77.34 ( 77.97)	Acc@5  98.05 ( 98.60)
Epoch: [15][ 40/352]	Time  0.223 ( 0.224)	Data  0.000 ( 0.006)	Loss 5.5231e-01 (6.14

Epoch: [16][210/352]	Time  0.226 ( 0.224)	Data  0.000 ( 0.001)	Loss 5.9533e-01 (6.2505e-01)	Acc@1  78.91 ( 77.96)	Acc@5  99.22 ( 98.57)
Epoch: [16][220/352]	Time  0.216 ( 0.224)	Data  0.000 ( 0.001)	Loss 7.2877e-01 (6.2845e-01)	Acc@1  73.83 ( 77.83)	Acc@5  98.44 ( 98.55)
Epoch: [16][230/352]	Time  0.219 ( 0.224)	Data  0.000 ( 0.001)	Loss 6.3734e-01 (6.2826e-01)	Acc@1  79.69 ( 77.84)	Acc@5  98.44 ( 98.56)
Epoch: [16][240/352]	Time  0.216 ( 0.224)	Data  0.000 ( 0.001)	Loss 6.9005e-01 (6.3027e-01)	Acc@1  73.44 ( 77.75)	Acc@5  98.44 ( 98.56)
Epoch: [16][250/352]	Time  0.225 ( 0.224)	Data  0.000 ( 0.001)	Loss 7.6612e-01 (6.2976e-01)	Acc@1  72.66 ( 77.79)	Acc@5  98.05 ( 98.57)
Epoch: [16][260/352]	Time  0.217 ( 0.223)	Data  0.000 ( 0.001)	Loss 5.6411e-01 (6.2870e-01)	Acc@1  79.30 ( 77.80)	Acc@5  98.83 ( 98.56)
Epoch: [16][270/352]	Time  0.213 ( 0.223)	Data  0.000 ( 0.001)	Loss 7.5254e-01 (6.2943e-01)	Acc@1  73.44 ( 77.74)	Acc@5  98.05 ( 98.57)
Epoch: [16][280/352]	Time  0.221 ( 0.223)	Data  

Epoch: [18][ 80/352]	Time  0.215 ( 0.222)	Data  0.000 ( 0.003)	Loss 5.7524e-01 (5.9064e-01)	Acc@1  78.91 ( 79.09)	Acc@5 100.00 ( 98.89)
Epoch: [18][ 90/352]	Time  0.226 ( 0.222)	Data  0.000 ( 0.003)	Loss 5.7770e-01 (5.9096e-01)	Acc@1  78.91 ( 79.11)	Acc@5 100.00 ( 98.90)
Epoch: [18][100/352]	Time  0.233 ( 0.223)	Data  0.000 ( 0.002)	Loss 5.7146e-01 (5.9190e-01)	Acc@1  78.91 ( 79.05)	Acc@5  99.61 ( 98.92)
Epoch: [18][110/352]	Time  0.214 ( 0.223)	Data  0.000 ( 0.002)	Loss 6.4402e-01 (5.9219e-01)	Acc@1  75.39 ( 79.05)	Acc@5  99.22 ( 98.89)
Epoch: [18][120/352]	Time  0.236 ( 0.223)	Data  0.000 ( 0.002)	Loss 6.5921e-01 (5.9460e-01)	Acc@1  78.12 ( 78.93)	Acc@5  97.66 ( 98.88)
Epoch: [18][130/352]	Time  0.226 ( 0.223)	Data  0.000 ( 0.002)	Loss 6.1297e-01 (5.9350e-01)	Acc@1  79.69 ( 79.00)	Acc@5  98.44 ( 98.85)
Epoch: [18][140/352]	Time  0.228 ( 0.223)	Data  0.000 ( 0.002)	Loss 5.6372e-01 (5.9375e-01)	Acc@1  77.34 ( 78.87)	Acc@5  98.83 ( 98.84)
Epoch: [18][150/352]	Time  0.228 ( 0.223)	Data  

Epoch: [19][320/352]	Time  0.229 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.7756e-01 (6.0224e-01)	Acc@1  80.47 ( 78.78)	Acc@5  98.83 ( 98.76)
Epoch: [19][330/352]	Time  0.225 ( 0.222)	Data  0.000 ( 0.001)	Loss 6.8555e-01 (6.0244e-01)	Acc@1  75.78 ( 78.76)	Acc@5  98.83 ( 98.76)
Epoch: [19][340/352]	Time  0.223 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.0769e-01 (6.0237e-01)	Acc@1  82.03 ( 78.77)	Acc@5  99.22 ( 98.76)
Epoch: [19][350/352]	Time  0.223 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.9871e-01 (6.0236e-01)	Acc@1  76.95 ( 78.76)	Acc@5  98.83 ( 98.76)
=> Acc@1 78.756 Acc@5 98.753
==> 78.12 seconds to train this epoch


----- epoch: 20, lr: 0.1 -----
Epoch: [20][  0/352]	Time  0.328 ( 0.328)	Data  0.243 ( 0.243)	Loss 6.2474e-01 (6.2474e-01)	Acc@1  78.52 ( 78.52)	Acc@5  98.44 ( 98.44)
Epoch: [20][ 10/352]	Time  0.225 ( 0.231)	Data  0.000 ( 0.022)	Loss 7.1118e-01 (5.6941e-01)	Acc@1  76.17 ( 80.79)	Acc@5  99.22 ( 98.65)
Epoch: [20][ 20/352]	Time  0.224 ( 0.226)	Data  0.000 ( 0.012)	Loss 5.3856e-01 (5.75

Epoch: [21][190/352]	Time  0.222 ( 0.222)	Data  0.000 ( 0.001)	Loss 4.7402e-01 (5.8057e-01)	Acc@1  85.55 ( 79.27)	Acc@5  98.83 ( 98.76)
Epoch: [21][200/352]	Time  0.207 ( 0.222)	Data  0.000 ( 0.001)	Loss 6.9613e-01 (5.8090e-01)	Acc@1  73.83 ( 79.27)	Acc@5  98.05 ( 98.77)
Epoch: [21][210/352]	Time  0.221 ( 0.222)	Data  0.000 ( 0.001)	Loss 4.9529e-01 (5.8222e-01)	Acc@1  84.38 ( 79.21)	Acc@5  98.05 ( 98.76)
Epoch: [21][220/352]	Time  0.225 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.5211e-01 (5.8248e-01)	Acc@1  81.25 ( 79.22)	Acc@5 100.00 ( 98.76)
Epoch: [21][230/352]	Time  0.223 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.7983e-01 (5.8278e-01)	Acc@1  83.59 ( 79.24)	Acc@5  99.61 ( 98.76)
Epoch: [21][240/352]	Time  0.223 ( 0.222)	Data  0.000 ( 0.001)	Loss 6.8804e-01 (5.8455e-01)	Acc@1  74.22 ( 79.18)	Acc@5  98.05 ( 98.75)
Epoch: [21][250/352]	Time  0.219 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.8084e-01 (5.8572e-01)	Acc@1  81.25 ( 79.18)	Acc@5  97.66 ( 98.74)
Epoch: [21][260/352]	Time  0.219 ( 0.222)	Data  

Epoch: [23][ 60/352]	Time  0.220 ( 0.223)	Data  0.000 ( 0.004)	Loss 6.9384e-01 (5.6960e-01)	Acc@1  75.78 ( 79.86)	Acc@5  98.44 ( 98.96)
Epoch: [23][ 70/352]	Time  0.221 ( 0.223)	Data  0.000 ( 0.003)	Loss 5.1989e-01 (5.6934e-01)	Acc@1  80.47 ( 79.77)	Acc@5  98.83 ( 98.93)
Epoch: [23][ 80/352]	Time  0.217 ( 0.223)	Data  0.000 ( 0.003)	Loss 5.1346e-01 (5.7014e-01)	Acc@1  82.03 ( 79.77)	Acc@5  98.05 ( 98.88)
Epoch: [23][ 90/352]	Time  0.217 ( 0.223)	Data  0.000 ( 0.003)	Loss 5.7325e-01 (5.6957e-01)	Acc@1  80.47 ( 79.80)	Acc@5  98.05 ( 98.88)
Epoch: [23][100/352]	Time  0.212 ( 0.223)	Data  0.000 ( 0.003)	Loss 5.5403e-01 (5.7042e-01)	Acc@1  79.69 ( 79.79)	Acc@5 100.00 ( 98.91)
Epoch: [23][110/352]	Time  0.220 ( 0.222)	Data  0.000 ( 0.002)	Loss 7.4761e-01 (5.7501e-01)	Acc@1  71.48 ( 79.65)	Acc@5  98.44 ( 98.89)
Epoch: [23][120/352]	Time  0.218 ( 0.222)	Data  0.000 ( 0.002)	Loss 5.4968e-01 (5.7409e-01)	Acc@1  81.64 ( 79.65)	Acc@5  96.88 ( 98.87)
Epoch: [23][130/352]	Time  0.219 ( 0.222)	Data  

Epoch: [24][300/352]	Time  0.222 ( 0.228)	Data  0.000 ( 0.001)	Loss 7.2524e-01 (5.7115e-01)	Acc@1  75.00 ( 79.78)	Acc@5  97.27 ( 98.83)
Epoch: [24][310/352]	Time  0.236 ( 0.228)	Data  0.000 ( 0.001)	Loss 5.8190e-01 (5.7181e-01)	Acc@1  80.08 ( 79.74)	Acc@5  98.83 ( 98.83)
Epoch: [24][320/352]	Time  0.219 ( 0.228)	Data  0.000 ( 0.001)	Loss 5.7113e-01 (5.7200e-01)	Acc@1  82.81 ( 79.74)	Acc@5  98.44 ( 98.84)
Epoch: [24][330/352]	Time  0.219 ( 0.228)	Data  0.000 ( 0.001)	Loss 5.6980e-01 (5.7186e-01)	Acc@1  82.42 ( 79.72)	Acc@5  98.44 ( 98.84)
Epoch: [24][340/352]	Time  0.220 ( 0.228)	Data  0.000 ( 0.001)	Loss 5.8349e-01 (5.7202e-01)	Acc@1  81.64 ( 79.73)	Acc@5  98.44 ( 98.84)
Epoch: [24][350/352]	Time  0.254 ( 0.228)	Data  0.000 ( 0.001)	Loss 4.3716e-01 (5.7133e-01)	Acc@1  81.25 ( 79.77)	Acc@5 100.00 ( 98.84)
=> Acc@1 79.782 Acc@5 98.844
==> 80.32 seconds to train this epoch


----- epoch: 25, lr: 0.1 -----
Epoch: [25][  0/352]	Time  0.316 ( 0.316)	Data  0.218 ( 0.218)	Loss 5.4947e-01 (5.49

Epoch: [26][170/352]	Time  0.237 ( 0.236)	Data  0.000 ( 0.001)	Loss 6.0206e-01 (5.5048e-01)	Acc@1  80.86 ( 80.42)	Acc@5  98.05 ( 98.87)
Epoch: [26][180/352]	Time  0.236 ( 0.236)	Data  0.000 ( 0.001)	Loss 6.1121e-01 (5.5027e-01)	Acc@1  78.52 ( 80.46)	Acc@5  98.44 ( 98.88)
Epoch: [26][190/352]	Time  0.242 ( 0.235)	Data  0.000 ( 0.001)	Loss 6.7301e-01 (5.5084e-01)	Acc@1  74.61 ( 80.44)	Acc@5  98.83 ( 98.88)
Epoch: [26][200/352]	Time  0.250 ( 0.236)	Data  0.000 ( 0.001)	Loss 5.8557e-01 (5.5215e-01)	Acc@1  78.52 ( 80.41)	Acc@5  99.61 ( 98.86)
Epoch: [26][210/352]	Time  0.239 ( 0.236)	Data  0.000 ( 0.001)	Loss 5.8672e-01 (5.5338e-01)	Acc@1  74.61 ( 80.38)	Acc@5  98.44 ( 98.85)
Epoch: [26][220/352]	Time  0.241 ( 0.236)	Data  0.000 ( 0.001)	Loss 5.0170e-01 (5.5301e-01)	Acc@1  80.86 ( 80.37)	Acc@5  98.44 ( 98.86)
Epoch: [26][230/352]	Time  0.237 ( 0.236)	Data  0.000 ( 0.001)	Loss 5.8788e-01 (5.5491e-01)	Acc@1  78.91 ( 80.29)	Acc@5  98.83 ( 98.85)
Epoch: [26][240/352]	Time  0.252 ( 0.237)	Data  

Epoch: [28][ 40/352]	Time  0.227 ( 0.224)	Data  0.000 ( 0.006)	Loss 4.8736e-01 (5.3204e-01)	Acc@1  81.25 ( 81.30)	Acc@5  99.61 ( 98.95)
Epoch: [28][ 50/352]	Time  0.221 ( 0.223)	Data  0.000 ( 0.004)	Loss 5.4884e-01 (5.3087e-01)	Acc@1  79.30 ( 81.27)	Acc@5  98.05 ( 98.92)
Epoch: [28][ 60/352]	Time  0.224 ( 0.223)	Data  0.000 ( 0.004)	Loss 4.7382e-01 (5.3214e-01)	Acc@1  81.25 ( 81.26)	Acc@5  99.22 ( 98.93)
Epoch: [28][ 70/352]	Time  0.223 ( 0.223)	Data  0.000 ( 0.003)	Loss 6.1764e-01 (5.3627e-01)	Acc@1  79.30 ( 81.05)	Acc@5  96.88 ( 98.90)
Epoch: [28][ 80/352]	Time  0.225 ( 0.223)	Data  0.000 ( 0.003)	Loss 4.8566e-01 (5.3242e-01)	Acc@1  83.20 ( 81.14)	Acc@5  98.44 ( 98.93)
Epoch: [28][ 90/352]	Time  0.226 ( 0.223)	Data  0.000 ( 0.003)	Loss 5.1288e-01 (5.3504e-01)	Acc@1  80.86 ( 81.10)	Acc@5  99.61 ( 98.94)
Epoch: [28][100/352]	Time  0.217 ( 0.222)	Data  0.000 ( 0.002)	Loss 5.3713e-01 (5.3457e-01)	Acc@1  79.30 ( 81.14)	Acc@5  99.22 ( 98.96)
Epoch: [28][110/352]	Time  0.228 ( 0.222)	Data  

Epoch: [29][280/352]	Time  0.222 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.3806e-01 (5.4094e-01)	Acc@1  82.03 ( 80.77)	Acc@5  98.83 ( 98.97)
Epoch: [29][290/352]	Time  0.227 ( 0.222)	Data  0.000 ( 0.001)	Loss 6.5390e-01 (5.4282e-01)	Acc@1  76.56 ( 80.72)	Acc@5  98.83 ( 98.96)
Epoch: [29][300/352]	Time  0.221 ( 0.222)	Data  0.000 ( 0.001)	Loss 4.4392e-01 (5.4339e-01)	Acc@1  83.98 ( 80.69)	Acc@5  99.22 ( 98.96)
Epoch: [29][310/352]	Time  0.219 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.7341e-01 (5.4369e-01)	Acc@1  80.47 ( 80.69)	Acc@5  98.44 ( 98.96)
Epoch: [29][320/352]	Time  0.234 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.4909e-01 (5.4391e-01)	Acc@1  81.25 ( 80.69)	Acc@5  99.22 ( 98.95)
Epoch: [29][330/352]	Time  0.225 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.4063e-01 (5.4312e-01)	Acc@1  80.47 ( 80.70)	Acc@5  99.22 ( 98.95)
Epoch: [29][340/352]	Time  0.216 ( 0.222)	Data  0.000 ( 0.001)	Loss 5.3832e-01 (5.4353e-01)	Acc@1  80.08 ( 80.69)	Acc@5  99.22 ( 98.95)
Epoch: [29][350/352]	Time  0.220 ( 0.222)	Data  

Epoch: [31][150/352]	Time  0.234 ( 0.233)	Data  0.000 ( 0.002)	Loss 5.3601e-01 (5.3308e-01)	Acc@1  81.64 ( 81.15)	Acc@5  98.83 ( 99.02)
Epoch: [31][160/352]	Time  0.228 ( 0.233)	Data  0.000 ( 0.002)	Loss 6.3058e-01 (5.3585e-01)	Acc@1  78.91 ( 81.08)	Acc@5  98.44 ( 99.02)
Epoch: [31][170/352]	Time  0.228 ( 0.233)	Data  0.000 ( 0.002)	Loss 5.1861e-01 (5.3854e-01)	Acc@1  77.73 ( 80.97)	Acc@5 100.00 ( 99.01)
Epoch: [31][180/352]	Time  0.237 ( 0.233)	Data  0.000 ( 0.002)	Loss 5.5400e-01 (5.3617e-01)	Acc@1  80.47 ( 81.04)	Acc@5  98.83 ( 99.03)
Epoch: [31][190/352]	Time  0.222 ( 0.232)	Data  0.000 ( 0.002)	Loss 5.4995e-01 (5.3413e-01)	Acc@1  82.81 ( 81.13)	Acc@5  98.44 ( 99.02)
Epoch: [31][200/352]	Time  0.230 ( 0.232)	Data  0.000 ( 0.002)	Loss 4.7775e-01 (5.3420e-01)	Acc@1  84.77 ( 81.13)	Acc@5  99.22 ( 99.01)
Epoch: [31][210/352]	Time  0.231 ( 0.232)	Data  0.000 ( 0.001)	Loss 5.8407e-01 (5.3471e-01)	Acc@1  81.25 ( 81.12)	Acc@5  99.61 ( 99.00)
Epoch: [31][220/352]	Time  0.224 ( 0.231)	Data  

Epoch: [33][ 20/352]	Time  0.221 ( 0.227)	Data  0.000 ( 0.010)	Loss 4.1969e-01 (5.2246e-01)	Acc@1  84.77 ( 81.88)	Acc@5  98.44 ( 98.88)
Epoch: [33][ 30/352]	Time  0.215 ( 0.226)	Data  0.000 ( 0.007)	Loss 5.2515e-01 (5.0996e-01)	Acc@1  82.03 ( 82.16)	Acc@5  99.22 ( 98.99)
Epoch: [33][ 40/352]	Time  0.235 ( 0.227)	Data  0.000 ( 0.005)	Loss 4.8450e-01 (5.1512e-01)	Acc@1  81.25 ( 81.89)	Acc@5  98.83 ( 98.96)
Epoch: [33][ 50/352]	Time  0.224 ( 0.227)	Data  0.000 ( 0.004)	Loss 5.7051e-01 (5.1079e-01)	Acc@1  80.08 ( 82.00)	Acc@5 100.00 ( 99.00)
Epoch: [33][ 60/352]	Time  0.232 ( 0.227)	Data  0.000 ( 0.004)	Loss 7.5990e-01 (5.1277e-01)	Acc@1  73.83 ( 81.90)	Acc@5  97.66 ( 98.94)
Epoch: [33][ 70/352]	Time  0.239 ( 0.227)	Data  0.000 ( 0.003)	Loss 5.9339e-01 (5.1475e-01)	Acc@1  79.69 ( 81.81)	Acc@5  97.66 ( 98.93)
Epoch: [33][ 80/352]	Time  0.246 ( 0.228)	Data  0.000 ( 0.003)	Loss 4.6047e-01 (5.1481e-01)	Acc@1  85.55 ( 81.83)	Acc@5  98.44 ( 98.92)
Epoch: [33][ 90/352]	Time  0.226 ( 0.229)	Data  

Epoch: [34][260/352]	Time  0.221 ( 0.227)	Data  0.000 ( 0.001)	Loss 5.3400e-01 (5.2515e-01)	Acc@1  81.25 ( 81.56)	Acc@5 100.00 ( 99.00)
Epoch: [34][270/352]	Time  0.242 ( 0.227)	Data  0.000 ( 0.001)	Loss 4.8408e-01 (5.2651e-01)	Acc@1  83.59 ( 81.53)	Acc@5  99.22 ( 99.00)
Epoch: [34][280/352]	Time  0.221 ( 0.227)	Data  0.000 ( 0.001)	Loss 5.5198e-01 (5.2702e-01)	Acc@1  81.64 ( 81.51)	Acc@5 100.00 ( 99.00)
Epoch: [34][290/352]	Time  0.232 ( 0.227)	Data  0.000 ( 0.001)	Loss 5.4227e-01 (5.2762e-01)	Acc@1  81.25 ( 81.51)	Acc@5  98.83 ( 98.99)
Epoch: [34][300/352]	Time  0.227 ( 0.227)	Data  0.000 ( 0.001)	Loss 5.2449e-01 (5.2753e-01)	Acc@1  80.08 ( 81.49)	Acc@5  99.22 ( 98.99)
Epoch: [34][310/352]	Time  0.218 ( 0.227)	Data  0.000 ( 0.001)	Loss 6.0915e-01 (5.2878e-01)	Acc@1  77.73 ( 81.44)	Acc@5  98.44 ( 98.99)
Epoch: [34][320/352]	Time  0.212 ( 0.227)	Data  0.000 ( 0.001)	Loss 5.2330e-01 (5.2858e-01)	Acc@1  82.03 ( 81.45)	Acc@5  99.22 ( 99.00)
Epoch: [34][330/352]	Time  0.226 ( 0.227)	Data  

Epoch: [36][130/352]	Time  0.243 ( 0.243)	Data  0.000 ( 0.002)	Loss 4.3505e-01 (5.2376e-01)	Acc@1  85.94 ( 81.58)	Acc@5  99.61 ( 99.04)
Epoch: [36][140/352]	Time  0.242 ( 0.243)	Data  0.000 ( 0.002)	Loss 5.0442e-01 (5.2237e-01)	Acc@1  82.42 ( 81.59)	Acc@5  99.22 ( 99.04)
Epoch: [36][150/352]	Time  0.248 ( 0.243)	Data  0.000 ( 0.002)	Loss 4.7495e-01 (5.2030e-01)	Acc@1  83.59 ( 81.66)	Acc@5  98.05 ( 99.04)
Epoch: [36][160/352]	Time  0.238 ( 0.243)	Data  0.000 ( 0.002)	Loss 4.9176e-01 (5.1863e-01)	Acc@1  82.42 ( 81.74)	Acc@5  99.22 ( 99.05)
Epoch: [36][170/352]	Time  0.241 ( 0.243)	Data  0.000 ( 0.002)	Loss 6.8559e-01 (5.2058e-01)	Acc@1  75.78 ( 81.70)	Acc@5  97.66 ( 99.05)
Epoch: [36][180/352]	Time  0.254 ( 0.243)	Data  0.000 ( 0.002)	Loss 4.6738e-01 (5.1988e-01)	Acc@1  82.81 ( 81.77)	Acc@5  99.61 ( 99.05)
Epoch: [36][190/352]	Time  0.248 ( 0.243)	Data  0.000 ( 0.002)	Loss 4.7397e-01 (5.1901e-01)	Acc@1  85.16 ( 81.82)	Acc@5  98.83 ( 99.04)
Epoch: [36][200/352]	Time  0.255 ( 0.243)	Data  

Epoch: [38][  0/352]	Time  0.324 ( 0.324)	Data  0.232 ( 0.232)	Loss 4.5899e-01 (4.5899e-01)	Acc@1  86.33 ( 86.33)	Acc@5  97.66 ( 97.66)
Epoch: [38][ 10/352]	Time  0.248 ( 0.249)	Data  0.000 ( 0.021)	Loss 4.8537e-01 (4.8960e-01)	Acc@1  82.81 ( 83.31)	Acc@5  99.22 ( 99.15)
Epoch: [38][ 20/352]	Time  0.248 ( 0.247)	Data  0.000 ( 0.011)	Loss 5.1004e-01 (4.9458e-01)	Acc@1  80.86 ( 82.89)	Acc@5  99.61 ( 99.07)
Epoch: [38][ 30/352]	Time  0.251 ( 0.245)	Data  0.000 ( 0.008)	Loss 5.5742e-01 (4.9329e-01)	Acc@1  81.64 ( 82.81)	Acc@5  98.44 ( 99.18)
Epoch: [38][ 40/352]	Time  0.240 ( 0.244)	Data  0.000 ( 0.006)	Loss 5.0606e-01 (4.9679e-01)	Acc@1  83.20 ( 82.75)	Acc@5  99.22 ( 99.16)
Epoch: [38][ 50/352]	Time  0.245 ( 0.245)	Data  0.000 ( 0.005)	Loss 5.3220e-01 (4.9706e-01)	Acc@1  78.52 ( 82.73)	Acc@5 100.00 ( 99.11)
Epoch: [38][ 60/352]	Time  0.241 ( 0.245)	Data  0.000 ( 0.004)	Loss 5.4755e-01 (5.0090e-01)	Acc@1  83.20 ( 82.58)	Acc@5  99.22 ( 99.15)
Epoch: [38][ 70/352]	Time  0.252 ( 0.245)	Data  

Epoch: [39][240/352]	Time  0.246 ( 0.246)	Data  0.000 ( 0.001)	Loss 6.2431e-01 (5.1402e-01)	Acc@1  77.73 ( 81.92)	Acc@5  97.66 ( 99.09)
Epoch: [39][250/352]	Time  0.248 ( 0.247)	Data  0.000 ( 0.001)	Loss 6.4294e-01 (5.1469e-01)	Acc@1  77.73 ( 81.89)	Acc@5  98.44 ( 99.09)
Epoch: [39][260/352]	Time  0.257 ( 0.247)	Data  0.000 ( 0.001)	Loss 4.4572e-01 (5.1452e-01)	Acc@1  83.59 ( 81.91)	Acc@5  98.83 ( 99.08)
Epoch: [39][270/352]	Time  0.252 ( 0.247)	Data  0.000 ( 0.001)	Loss 5.1789e-01 (5.1447e-01)	Acc@1  83.98 ( 81.91)	Acc@5  98.05 ( 99.07)
Epoch: [39][280/352]	Time  0.260 ( 0.247)	Data  0.000 ( 0.001)	Loss 4.7154e-01 (5.1493e-01)	Acc@1  81.25 ( 81.90)	Acc@5  99.22 ( 99.06)
Epoch: [39][290/352]	Time  0.244 ( 0.247)	Data  0.000 ( 0.001)	Loss 5.0836e-01 (5.1544e-01)	Acc@1  79.30 ( 81.89)	Acc@5  99.22 ( 99.05)
Epoch: [39][300/352]	Time  0.252 ( 0.247)	Data  0.000 ( 0.001)	Loss 6.1104e-01 (5.1788e-01)	Acc@1  80.08 ( 81.81)	Acc@5  98.83 ( 99.03)
Epoch: [39][310/352]	Time  0.254 ( 0.247)	Data  

Epoch: [41][110/352]	Time  0.239 ( 0.251)	Data  0.000 ( 0.003)	Loss 5.2914e-01 (4.9941e-01)	Acc@1  81.64 ( 82.32)	Acc@5  99.61 ( 99.09)
Epoch: [41][120/352]	Time  0.244 ( 0.251)	Data  0.000 ( 0.002)	Loss 4.5521e-01 (4.9991e-01)	Acc@1  84.38 ( 82.33)	Acc@5  99.61 ( 99.10)
Epoch: [41][130/352]	Time  0.247 ( 0.251)	Data  0.000 ( 0.002)	Loss 4.7477e-01 (5.0078e-01)	Acc@1  83.98 ( 82.33)	Acc@5  99.22 ( 99.10)
Epoch: [41][140/352]	Time  0.247 ( 0.251)	Data  0.000 ( 0.002)	Loss 5.0115e-01 (5.0291e-01)	Acc@1  80.86 ( 82.21)	Acc@5  98.83 ( 99.08)
Epoch: [41][150/352]	Time  0.252 ( 0.250)	Data  0.000 ( 0.002)	Loss 3.8688e-01 (5.0217e-01)	Acc@1  85.55 ( 82.25)	Acc@5  99.61 ( 99.08)
Epoch: [41][160/352]	Time  0.248 ( 0.250)	Data  0.000 ( 0.002)	Loss 5.4441e-01 (5.0270e-01)	Acc@1  82.03 ( 82.25)	Acc@5  98.44 ( 99.06)
Epoch: [41][170/352]	Time  0.243 ( 0.250)	Data  0.000 ( 0.002)	Loss 5.1861e-01 (5.0144e-01)	Acc@1  81.64 ( 82.27)	Acc@5  99.22 ( 99.07)
Epoch: [41][180/352]	Time  0.244 ( 0.250)	Data  

Epoch: [42][350/352]	Time  0.248 ( 0.250)	Data  0.000 ( 0.001)	Loss 4.9704e-01 (5.1272e-01)	Acc@1  83.20 ( 81.97)	Acc@5  98.83 ( 99.06)
=> Acc@1 81.967 Acc@5 99.064
==> 88.16 seconds to train this epoch


----- epoch: 43, lr: 0.1 -----
Epoch: [43][  0/352]	Time  0.343 ( 0.343)	Data  0.245 ( 0.245)	Loss 4.9170e-01 (4.9170e-01)	Acc@1  81.25 ( 81.25)	Acc@5  98.83 ( 98.83)
Epoch: [43][ 10/352]	Time  0.264 ( 0.263)	Data  0.000 ( 0.022)	Loss 4.8467e-01 (4.6330e-01)	Acc@1  84.77 ( 83.70)	Acc@5  99.61 ( 99.36)
Epoch: [43][ 20/352]	Time  0.249 ( 0.257)	Data  0.000 ( 0.012)	Loss 5.6248e-01 (4.7524e-01)	Acc@1  80.08 ( 83.05)	Acc@5  99.22 ( 99.35)
Epoch: [43][ 30/352]	Time  0.263 ( 0.258)	Data  0.000 ( 0.008)	Loss 5.7332e-01 (4.8901e-01)	Acc@1  79.30 ( 82.71)	Acc@5  99.61 ( 99.28)
Epoch: [43][ 40/352]	Time  0.252 ( 0.258)	Data  0.000 ( 0.006)	Loss 4.3355e-01 (4.8428e-01)	Acc@1  85.16 ( 82.96)	Acc@5  99.22 ( 99.29)
Epoch: [43][ 50/352]	Time  0.255 ( 0.256)	Data  0.000 ( 0.005)	Loss 4.2455e-01 (4.87

Epoch: [44][220/352]	Time  0.246 ( 0.247)	Data  0.000 ( 0.002)	Loss 5.3643e-01 (5.0627e-01)	Acc@1  80.86 ( 82.07)	Acc@5  96.88 ( 99.08)
Epoch: [44][230/352]	Time  0.228 ( 0.246)	Data  0.000 ( 0.002)	Loss 4.9186e-01 (5.0708e-01)	Acc@1  83.20 ( 82.08)	Acc@5  99.61 ( 99.07)
Epoch: [44][240/352]	Time  0.245 ( 0.246)	Data  0.000 ( 0.002)	Loss 4.3387e-01 (5.0702e-01)	Acc@1  84.77 ( 82.08)	Acc@5  99.61 ( 99.06)
Epoch: [44][250/352]	Time  0.246 ( 0.246)	Data  0.000 ( 0.001)	Loss 4.9471e-01 (5.0568e-01)	Acc@1  80.86 ( 82.11)	Acc@5  99.61 ( 99.06)
Epoch: [44][260/352]	Time  0.245 ( 0.247)	Data  0.000 ( 0.001)	Loss 5.1400e-01 (5.0518e-01)	Acc@1  80.86 ( 82.14)	Acc@5  98.05 ( 99.05)
Epoch: [44][270/352]	Time  0.255 ( 0.246)	Data  0.000 ( 0.001)	Loss 5.6509e-01 (5.0547e-01)	Acc@1  80.86 ( 82.15)	Acc@5  99.61 ( 99.05)
Epoch: [44][280/352]	Time  0.238 ( 0.246)	Data  0.000 ( 0.001)	Loss 5.0838e-01 (5.0539e-01)	Acc@1  82.42 ( 82.11)	Acc@5 100.00 ( 99.05)
Epoch: [44][290/352]	Time  0.234 ( 0.246)	Data  

Epoch: [46][ 90/352]	Time  0.240 ( 0.237)	Data  0.000 ( 0.003)	Loss 4.8152e-01 (4.9029e-01)	Acc@1  80.47 ( 82.68)	Acc@5  99.22 ( 99.14)
Epoch: [46][100/352]	Time  0.232 ( 0.237)	Data  0.000 ( 0.002)	Loss 5.2195e-01 (4.8993e-01)	Acc@1  82.81 ( 82.66)	Acc@5  98.44 ( 99.15)
Epoch: [46][110/352]	Time  0.238 ( 0.237)	Data  0.000 ( 0.002)	Loss 4.5658e-01 (4.9289e-01)	Acc@1  82.81 ( 82.57)	Acc@5  99.61 ( 99.14)
Epoch: [46][120/352]	Time  0.244 ( 0.237)	Data  0.000 ( 0.002)	Loss 4.1353e-01 (4.9238e-01)	Acc@1  86.33 ( 82.66)	Acc@5  99.61 ( 99.13)
Epoch: [46][130/352]	Time  0.238 ( 0.237)	Data  0.000 ( 0.002)	Loss 5.3775e-01 (4.9241e-01)	Acc@1  80.47 ( 82.69)	Acc@5  99.22 ( 99.12)
Epoch: [46][140/352]	Time  0.232 ( 0.237)	Data  0.000 ( 0.002)	Loss 4.7834e-01 (4.9080e-01)	Acc@1  82.42 ( 82.76)	Acc@5  99.22 ( 99.10)
Epoch: [46][150/352]	Time  0.240 ( 0.237)	Data  0.000 ( 0.002)	Loss 5.0365e-01 (4.9128e-01)	Acc@1  82.03 ( 82.75)	Acc@5  99.22 ( 99.12)
Epoch: [46][160/352]	Time  0.237 ( 0.237)	Data  

Epoch: [47][330/352]	Time  0.237 ( 0.235)	Data  0.000 ( 0.001)	Loss 5.7425e-01 (5.0511e-01)	Acc@1  80.47 ( 82.24)	Acc@5  98.83 ( 99.05)
Epoch: [47][340/352]	Time  0.228 ( 0.235)	Data  0.000 ( 0.001)	Loss 5.4220e-01 (5.0505e-01)	Acc@1  80.47 ( 82.24)	Acc@5  99.61 ( 99.05)
Epoch: [47][350/352]	Time  0.231 ( 0.235)	Data  0.000 ( 0.001)	Loss 4.3426e-01 (5.0503e-01)	Acc@1  85.16 ( 82.23)	Acc@5  99.61 ( 99.05)
=> Acc@1 82.242 Acc@5 99.050
==> 82.82 seconds to train this epoch


----- epoch: 48, lr: 0.1 -----
Epoch: [48][  0/352]	Time  0.295 ( 0.295)	Data  0.205 ( 0.205)	Loss 5.3198e-01 (5.3198e-01)	Acc@1  80.86 ( 80.86)	Acc@5 100.00 (100.00)
Epoch: [48][ 10/352]	Time  0.235 ( 0.241)	Data  0.000 ( 0.019)	Loss 5.2076e-01 (4.8539e-01)	Acc@1  82.81 ( 82.39)	Acc@5  99.61 ( 99.57)
Epoch: [48][ 20/352]	Time  0.240 ( 0.238)	Data  0.000 ( 0.010)	Loss 4.5395e-01 (4.8939e-01)	Acc@1  84.38 ( 82.22)	Acc@5  99.61 ( 99.48)
Epoch: [48][ 30/352]	Time  0.238 ( 0.238)	Data  0.000 ( 0.007)	Loss 3.9897e-01 (4.77

KeyboardInterrupt: 