In [1]:
import argparse
import os
import shutil
import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import resnet

In [2]:
model_names = sorted(name for name in resnet.__dict__
    if name.islower() and not name.startswith("__")
                     and name.startswith("resnet")
                     and callable(resnet.__dict__[name]))

print(model_names)

parser = argparse.ArgumentParser(description='Propert ResNets for CIFAR10 in pytorch')
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet32',
                    choices=model_names,
                    help='model architecture: ' + ' | '.join(model_names) +
                    ' (default: resnet32)')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=200, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                    help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=128, type=int,
                    metavar='N', help='mini-batch size (default: 128)')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial learning rate')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)')
parser.add_argument('--print-freq', '-p', default=50, type=int,
                    metavar='N', help='print frequency (default: 50)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                    help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                    help='use pre-trained model')
parser.add_argument('--half', dest='half', action='store_true',
                    help='use half-precision(16-bit) ')
parser.add_argument('--save-dir', dest='save_dir',
                    help='The directory used to save the trained models',
                    default='save_temp', type=str)
parser.add_argument('--save-every', dest='save_every',
                    help='Saves checkpoints at every specified number of epochs',
                    type=int, default=10)
best_prec1 = 0


def main():
    global args, best_prec1
    args = parser.parse_args(args=[])
#     args = parser.parse_args()


    # Check the save_dir exists or not
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    model = torch.nn.DataParallel(resnet.__dict__[args.arch]())
    model.cuda()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize,
        ]), download=True),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=128, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    if args.half:
        model.half()
        criterion.half()

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        milestones=[100, 150], last_epoch=args.start_epoch - 1)

    if args.arch in ['resnet1202', 'resnet110']:
        # for resnet1202 original paper uses lr=0.01 for first 400 minibatches for warm-up
        # then switch back. In this setup it will correspond for first epoch.
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.lr*0.1


    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):

        # train for one epoch
        print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
        train(train_loader, model, criterion, optimizer, epoch)
        lr_scheduler.step()

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        if epoch > 0 and epoch % args.save_every == 0:
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best, filename=os.path.join(args.save_dir, 'checkpoint.th'))

        save_checkpoint({
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        }, is_best, filename=os.path.join(args.save_dir, 'model.th'))


def train(train_loader, model, criterion, optimizer, epoch):
    """
        Run one train epoch
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda()
        input_var = input.cuda()
        target_var = target
        if args.half:
            input_var = input_var.half()

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        output = output.float()
        loss = loss.float()
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1))


def validate(val_loader, model, criterion):
    """
    Run evaluation
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            target = target.cuda()
            input_var = input.cuda()
            target_var = target.cuda()

            if args.half:
                input_var = input_var.half()

            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            prec1 = accuracy(output.data, target)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i, len(val_loader), batch_time=batch_time, loss=losses,
                          top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'
          .format(top1=top1))

    return top1.avg

def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    """
    Save the training model
    """
    torch.save(state, filename)

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

['resnet110', 'resnet1202', 'resnet20', 'resnet32', 'resnet44', 'resnet56']


In [3]:
main()

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
current lr 1.00000e-01
Epoch: [0][0/391]	Time 0.726 (0.726)	Data 0.204 (0.204)	Loss 3.1643 (3.1643)	Prec@1 13.281 (13.281)
Epoch: [0][50/391]	Time 0.075 (0.088)	Data 0.000 (0.004)	Loss 2.3206 (3.0196)	Prec@1 14.062 (11.688)
Epoch: [0][100/391]	Time 0.075 (0.082)	Data 0.000 (0.002)	Loss 2.2302 (2.6560)	Prec@1 15.625 (12.562)
Epoch: [0][150/391]	Time 0.075 (0.079)	Data 0.000 (0.001)	Loss 1.8191 (2.4590)	Prec@1 32.031 (15.827)
Epoch: [0][200/391]	Time 0.075 (0.078)	Data 0.000 (0.001)	Loss 1.8197 (2.3174)	Prec@1 28.906 (19.080)
Epoch: [0][250/391]	Time 0.075 (0.078)	Data 0.000 (0.001)	Loss 1.7726 (2.2085)	Prec@1 33.594 (22.065)
Epoch: [0][300/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 1.7353 (2.1277)	Prec@1 34.375 (24.271)
Epoch: [0][350/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 1.5550 (2.0601)	Prec@1 50.000 (26.313)
Test: [0/79]	Time 0.190 (0.190)	Loss 1.5943 (1.5943)	Prec@1 39.844 (39.844)
Test: [50/79]	Time 0.018 (0.021)	Loss

Epoch: [8][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.5197 (0.5173)	Prec@1 81.250 (82.644)
Epoch: [8][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.3318 (0.5140)	Prec@1 88.281 (82.333)
Epoch: [8][150/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.3921 (0.5125)	Prec@1 84.375 (82.440)
Epoch: [8][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.4896 (0.5183)	Prec@1 82.031 (82.093)
Epoch: [8][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.4934 (0.5229)	Prec@1 83.594 (81.978)
Epoch: [8][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.5793 (0.5239)	Prec@1 79.688 (82.003)
Epoch: [8][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.5656 (0.5228)	Prec@1 82.812 (82.036)
Test: [0/79]	Time 0.193 (0.193)	Loss 0.5233 (0.5233)	Prec@1 84.375 (84.375)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.7153 (0.5826)	Prec@1 79.688 (80.407)
 * Prec@1 80.400
current lr 1.00000e-01
Epoch: [9][0/391]	Time 0.257 (0.257)	Data 0.180 (0.180)	Loss 0.3734 (0.3734)	Prec@1 84.375 (84

Epoch: [16][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.3513 (0.3688)	Prec@1 89.062 (86.966)
Epoch: [16][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.3982 (0.3694)	Prec@1 87.500 (87.065)
Epoch: [16][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.4346 (0.3725)	Prec@1 87.500 (86.979)
Epoch: [16][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.3519 (0.3709)	Prec@1 85.156 (86.996)
Epoch: [16][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.3755 (0.3724)	Prec@1 85.938 (86.934)
Epoch: [16][350/391]	Time 0.076 (0.076)	Data 0.000 (0.001)	Loss 0.3918 (0.3741)	Prec@1 85.938 (86.879)
Test: [0/79]	Time 0.190 (0.190)	Loss 0.5786 (0.5786)	Prec@1 80.469 (80.469)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.7025 (0.5719)	Prec@1 79.688 (81.710)
 * Prec@1 81.080
current lr 1.00000e-01
Epoch: [17][0/391]	Time 0.261 (0.261)	Data 0.179 (0.179)	Loss 0.3815 (0.3815)	Prec@1 85.156 (85.156)
Epoch: [17][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.3671 (0.3568)	Prec@1 87

Epoch: [24][150/391]	Time 0.076 (0.077)	Data 0.000 (0.001)	Loss 0.3097 (0.2976)	Prec@1 86.719 (89.533)
Epoch: [24][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2616 (0.2993)	Prec@1 89.062 (89.478)
Epoch: [24][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2390 (0.3030)	Prec@1 95.312 (89.464)
Epoch: [24][300/391]	Time 0.076 (0.076)	Data 0.000 (0.001)	Loss 0.3238 (0.3048)	Prec@1 89.062 (89.335)
Epoch: [24][350/391]	Time 0.076 (0.076)	Data 0.000 (0.001)	Loss 0.4492 (0.3078)	Prec@1 84.375 (89.249)
Test: [0/79]	Time 0.187 (0.187)	Loss 0.4620 (0.4620)	Prec@1 85.938 (85.938)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.6272 (0.5074)	Prec@1 84.375 (84.390)
 * Prec@1 84.460
current lr 1.00000e-01
Epoch: [25][0/391]	Time 0.263 (0.263)	Data 0.184 (0.184)	Loss 0.3222 (0.3222)	Prec@1 89.062 (89.062)
Epoch: [25][50/391]	Time 0.076 (0.079)	Data 0.000 (0.004)	Loss 0.1841 (0.2933)	Prec@1 93.750 (90.150)
Epoch: [25][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.2882 (0.2949)	Prec@1 91

Epoch: [32][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2964 (0.2623)	Prec@1 92.969 (90.808)
Epoch: [32][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.3828 (0.2657)	Prec@1 87.500 (90.731)
Epoch: [32][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2859 (0.2664)	Prec@1 89.062 (90.654)
Epoch: [32][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.4430 (0.2691)	Prec@1 80.469 (90.534)
Test: [0/79]	Time 0.197 (0.197)	Loss 0.3285 (0.3285)	Prec@1 86.719 (86.719)
Test: [50/79]	Time 0.017 (0.021)	Loss 0.4564 (0.4250)	Prec@1 86.719 (86.244)
 * Prec@1 86.290
current lr 1.00000e-01
Epoch: [33][0/391]	Time 0.257 (0.257)	Data 0.180 (0.180)	Loss 0.2730 (0.2730)	Prec@1 89.062 (89.062)
Epoch: [33][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.2040 (0.2447)	Prec@1 89.062 (91.100)
Epoch: [33][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.2432 (0.2474)	Prec@1 89.844 (91.066)
Epoch: [33][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.2825 (0.2521)	Prec@1 93

Epoch: [40][250/391]	Time 0.076 (0.076)	Data 0.000 (0.001)	Loss 0.2588 (0.2445)	Prec@1 92.188 (91.525)
Epoch: [40][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1550 (0.2484)	Prec@1 95.312 (91.360)
Epoch: [40][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.3533 (0.2514)	Prec@1 90.625 (91.248)
Test: [0/79]	Time 0.188 (0.188)	Loss 0.3479 (0.3479)	Prec@1 89.062 (89.062)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.5151 (0.4312)	Prec@1 83.594 (86.535)
 * Prec@1 86.410
current lr 1.00000e-01
Epoch: [41][0/391]	Time 0.265 (0.265)	Data 0.188 (0.188)	Loss 0.2099 (0.2099)	Prec@1 91.406 (91.406)
Epoch: [41][50/391]	Time 0.076 (0.079)	Data 0.000 (0.004)	Loss 0.1399 (0.2220)	Prec@1 95.312 (92.142)
Epoch: [41][100/391]	Time 0.076 (0.077)	Data 0.000 (0.002)	Loss 0.1960 (0.2407)	Prec@1 91.406 (91.491)
Epoch: [41][150/391]	Time 0.076 (0.077)	Data 0.000 (0.001)	Loss 0.2311 (0.2371)	Prec@1 91.406 (91.556)
Epoch: [41][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.3346 (0.2396)	Prec@1 91

Epoch: [48][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1846 (0.2303)	Prec@1 90.625 (91.847)
Epoch: [48][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1180 (0.2302)	Prec@1 97.656 (91.809)
Test: [0/79]	Time 0.193 (0.193)	Loss 0.3522 (0.3522)	Prec@1 89.844 (89.844)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.4561 (0.4929)	Prec@1 88.281 (85.570)
 * Prec@1 85.520
current lr 1.00000e-01
Epoch: [49][0/391]	Time 0.258 (0.258)	Data 0.180 (0.180)	Loss 0.1722 (0.1722)	Prec@1 94.531 (94.531)
Epoch: [49][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.4018 (0.2204)	Prec@1 86.719 (92.004)
Epoch: [49][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.1887 (0.2188)	Prec@1 94.531 (92.048)
Epoch: [49][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.3509 (0.2304)	Prec@1 89.062 (91.810)
Epoch: [49][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2188 (0.2331)	Prec@1 93.750 (91.690)
Epoch: [49][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1857 (0.2326)	Prec@1 93

Epoch: [56][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1831 (0.2200)	Prec@1 92.969 (92.301)
Test: [0/79]	Time 0.184 (0.184)	Loss 0.4542 (0.4542)	Prec@1 85.156 (85.156)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.5999 (0.5046)	Prec@1 83.594 (84.804)
 * Prec@1 84.850
current lr 1.00000e-01
Epoch: [57][0/391]	Time 0.263 (0.263)	Data 0.185 (0.185)	Loss 0.0935 (0.0935)	Prec@1 97.656 (97.656)
Epoch: [57][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.2177 (0.1823)	Prec@1 92.969 (93.873)
Epoch: [57][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.2016 (0.1912)	Prec@1 92.188 (93.379)
Epoch: [57][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.2480 (0.2006)	Prec@1 91.406 (93.139)
Epoch: [57][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2660 (0.2036)	Prec@1 90.625 (92.988)
Epoch: [57][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2732 (0.2087)	Prec@1 89.062 (92.682)
Epoch: [57][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2591 (0.2100)	Prec@1 88

Test: [0/79]	Time 0.186 (0.186)	Loss 0.4689 (0.4689)	Prec@1 83.594 (83.594)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.6019 (0.4380)	Prec@1 83.594 (86.872)
 * Prec@1 86.830
current lr 1.00000e-01
Epoch: [65][0/391]	Time 0.256 (0.256)	Data 0.180 (0.180)	Loss 0.1581 (0.1581)	Prec@1 92.969 (92.969)
Epoch: [65][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.2040 (0.2097)	Prec@1 94.531 (92.693)
Epoch: [65][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.1856 (0.2022)	Prec@1 92.969 (92.675)
Epoch: [65][150/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1730 (0.2005)	Prec@1 93.750 (92.896)
Epoch: [65][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2472 (0.2012)	Prec@1 89.844 (92.833)
Epoch: [65][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2304 (0.2046)	Prec@1 91.406 (92.801)
Epoch: [65][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2105 (0.2054)	Prec@1 93.750 (92.803)
Epoch: [65][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2622 (0.2084)	Prec@1 91

 * Prec@1 86.960
current lr 1.00000e-01
Epoch: [73][0/391]	Time 0.264 (0.264)	Data 0.183 (0.183)	Loss 0.1978 (0.1978)	Prec@1 92.969 (92.969)
Epoch: [73][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.3287 (0.2168)	Prec@1 88.281 (92.525)
Epoch: [73][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.2471 (0.2032)	Prec@1 92.969 (92.876)
Epoch: [73][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.1509 (0.2013)	Prec@1 96.094 (92.974)
Epoch: [73][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2110 (0.2001)	Prec@1 94.531 (93.035)
Epoch: [73][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2281 (0.2035)	Prec@1 93.750 (92.913)
Epoch: [73][300/391]	Time 0.076 (0.076)	Data 0.000 (0.001)	Loss 0.1342 (0.2020)	Prec@1 96.875 (92.956)
Epoch: [73][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1515 (0.2005)	Prec@1 93.750 (92.940)
Test: [0/79]	Time 0.189 (0.189)	Loss 0.4405 (0.4405)	Prec@1 88.281 (88.281)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.4432 (0.4695)	Prec@1 86

Epoch: [81][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.1632 (0.1708)	Prec@1 96.094 (93.796)
Epoch: [81][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.1509 (0.1731)	Prec@1 95.312 (93.719)
Epoch: [81][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.2014 (0.1761)	Prec@1 91.406 (93.678)
Epoch: [81][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1898 (0.1803)	Prec@1 93.750 (93.509)
Epoch: [81][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1380 (0.1848)	Prec@1 96.094 (93.364)
Epoch: [81][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2129 (0.1887)	Prec@1 91.406 (93.197)
Epoch: [81][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2107 (0.1908)	Prec@1 95.312 (93.149)
Test: [0/79]	Time 0.192 (0.192)	Loss 0.2759 (0.2759)	Prec@1 89.844 (89.844)
Test: [50/79]	Time 0.017 (0.021)	Loss 0.4392 (0.4237)	Prec@1 85.938 (87.393)
 * Prec@1 87.090
current lr 1.00000e-01
Epoch: [82][0/391]	Time 0.258 (0.258)	Data 0.181 (0.181)	Loss 0.1815 (0.1815)	Prec@1 95

Epoch: [89][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.2462 (0.1721)	Prec@1 89.844 (94.044)
Epoch: [89][150/391]	Time 0.076 (0.077)	Data 0.000 (0.001)	Loss 0.1019 (0.1792)	Prec@1 97.656 (93.802)
Epoch: [89][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2107 (0.1818)	Prec@1 92.969 (93.688)
Epoch: [89][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2619 (0.1833)	Prec@1 91.406 (93.569)
Epoch: [89][300/391]	Time 0.076 (0.076)	Data 0.000 (0.001)	Loss 0.2053 (0.1881)	Prec@1 91.406 (93.436)
Epoch: [89][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0887 (0.1905)	Prec@1 97.656 (93.352)
Test: [0/79]	Time 0.185 (0.185)	Loss 0.3460 (0.3460)	Prec@1 91.406 (91.406)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.4640 (0.4529)	Prec@1 85.938 (86.780)
 * Prec@1 86.410
current lr 1.00000e-01
Epoch: [90][0/391]	Time 0.267 (0.267)	Data 0.187 (0.187)	Loss 0.1692 (0.1692)	Prec@1 94.531 (94.531)
Epoch: [90][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.1100 (0.1823)	Prec@1 96

Epoch: [97][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.1044 (0.1797)	Prec@1 96.094 (93.657)
Epoch: [97][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1993 (0.1785)	Prec@1 93.750 (93.754)
Epoch: [97][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1750 (0.1801)	Prec@1 94.531 (93.722)
Epoch: [97][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.1292 (0.1806)	Prec@1 95.312 (93.693)
Epoch: [97][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.2117 (0.1816)	Prec@1 92.188 (93.641)
Test: [0/79]	Time 0.185 (0.185)	Loss 0.4070 (0.4070)	Prec@1 85.938 (85.938)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.6014 (0.3884)	Prec@1 83.594 (87.898)
 * Prec@1 87.290
current lr 1.00000e-01
Epoch: [98][0/391]	Time 0.256 (0.256)	Data 0.179 (0.179)	Loss 0.0934 (0.0934)	Prec@1 96.875 (96.875)
Epoch: [98][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.1148 (0.1587)	Prec@1 95.312 (94.516)
Epoch: [98][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.1338 (0.1620)	Prec@1 97

Epoch: [105][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0327 (0.0491)	Prec@1 99.219 (98.546)
Epoch: [105][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0445 (0.0494)	Prec@1 97.656 (98.522)
Epoch: [105][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0347 (0.0498)	Prec@1 98.438 (98.508)
Epoch: [105][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0455 (0.0494)	Prec@1 98.438 (98.486)
Test: [0/79]	Time 0.196 (0.196)	Loss 0.1941 (0.1941)	Prec@1 92.969 (92.969)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2439 (0.2718)	Prec@1 92.969 (92.341)
 * Prec@1 92.210
current lr 1.00000e-02
Epoch: [106][0/391]	Time 0.257 (0.257)	Data 0.181 (0.181)	Loss 0.0432 (0.0432)	Prec@1 98.438 (98.438)
Epoch: [106][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0635 (0.0468)	Prec@1 97.656 (98.438)
Epoch: [106][100/391]	Time 0.076 (0.077)	Data 0.000 (0.002)	Loss 0.0781 (0.0450)	Prec@1 96.875 (98.584)
Epoch: [106][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.0797 (0.0458)	P

Epoch: [113][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0341 (0.0325)	Prec@1 98.438 (99.028)
Epoch: [113][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0332 (0.0324)	Prec@1 99.219 (99.016)
Epoch: [113][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0134 (0.0330)	Prec@1 99.219 (98.985)
Epoch: [113][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0261 (0.0328)	Prec@1 98.438 (99.001)
Test: [0/79]	Time 0.184 (0.184)	Loss 0.2042 (0.2042)	Prec@1 92.969 (92.969)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2437 (0.2895)	Prec@1 92.969 (92.310)
 * Prec@1 92.280
current lr 1.00000e-02
Epoch: [114][0/391]	Time 0.256 (0.256)	Data 0.179 (0.179)	Loss 0.0161 (0.0161)	Prec@1 100.000 (100.000)
Epoch: [114][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0123 (0.0273)	Prec@1 100.000 (99.249)
Epoch: [114][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0332 (0.0281)	Prec@1 99.219 (99.188)
Epoch: [114][150/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0089 (0.0288

Epoch: [121][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0199 (0.0235)	Prec@1 100.000 (99.277)
Epoch: [121][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0446 (0.0241)	Prec@1 98.438 (99.275)
Epoch: [121][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0112 (0.0236)	Prec@1 100.000 (99.284)
Epoch: [121][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0144 (0.0234)	Prec@1 100.000 (99.297)
Test: [0/79]	Time 0.185 (0.185)	Loss 0.2163 (0.2163)	Prec@1 92.969 (92.969)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2619 (0.3009)	Prec@1 92.188 (92.662)
 * Prec@1 92.450
current lr 1.00000e-02
Epoch: [122][0/391]	Time 0.264 (0.264)	Data 0.187 (0.187)	Loss 0.0174 (0.0174)	Prec@1 100.000 (100.000)
Epoch: [122][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0366 (0.0184)	Prec@1 98.438 (99.510)
Epoch: [122][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0297 (0.0203)	Prec@1 98.438 (99.435)
Epoch: [122][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.0206 (0.01

Epoch: [129][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0331 (0.0182)	Prec@1 99.219 (99.506)
Epoch: [129][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0093 (0.0177)	Prec@1 100.000 (99.527)
Epoch: [129][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0629 (0.0183)	Prec@1 99.219 (99.491)
Epoch: [129][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0026 (0.0183)	Prec@1 100.000 (99.495)
Test: [0/79]	Time 0.194 (0.194)	Loss 0.2034 (0.2034)	Prec@1 94.531 (94.531)
Test: [50/79]	Time 0.017 (0.021)	Loss 0.2731 (0.3219)	Prec@1 92.188 (92.586)
 * Prec@1 92.410
current lr 1.00000e-02
Epoch: [130][0/391]	Time 0.256 (0.256)	Data 0.179 (0.179)	Loss 0.0218 (0.0218)	Prec@1 99.219 (99.219)
Epoch: [130][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0187 (0.0167)	Prec@1 100.000 (99.602)
Epoch: [130][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0225 (0.0173)	Prec@1 99.219 (99.513)
Epoch: [130][150/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0228 (0.0172

Epoch: [137][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0045 (0.0153)	Prec@1 100.000 (99.565)
Epoch: [137][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0166 (0.0156)	Prec@1 100.000 (99.577)
Epoch: [137][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0175 (0.0152)	Prec@1 99.219 (99.587)
Epoch: [137][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0263 (0.0158)	Prec@1 99.219 (99.570)
Test: [0/79]	Time 0.185 (0.185)	Loss 0.2160 (0.2160)	Prec@1 92.969 (92.969)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2175 (0.3288)	Prec@1 92.969 (92.647)
 * Prec@1 92.520
current lr 1.00000e-02
Epoch: [138][0/391]	Time 0.256 (0.256)	Data 0.180 (0.180)	Loss 0.0091 (0.0091)	Prec@1 100.000 (100.000)
Epoch: [138][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0059 (0.0117)	Prec@1 100.000 (99.755)
Epoch: [138][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0180 (0.0136)	Prec@1 99.219 (99.606)
Epoch: [138][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.0264 (0.01

Epoch: [145][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0246 (0.0135)	Prec@1 99.219 (99.623)
Epoch: [145][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0090 (0.0132)	Prec@1 100.000 (99.633)
Epoch: [145][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0023 (0.0132)	Prec@1 100.000 (99.647)
Epoch: [145][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0076 (0.0128)	Prec@1 100.000 (99.657)
Test: [0/79]	Time 0.187 (0.187)	Loss 0.2203 (0.2203)	Prec@1 92.969 (92.969)
Test: [50/79]	Time 0.017 (0.021)	Loss 0.2664 (0.3302)	Prec@1 92.969 (92.785)
 * Prec@1 92.490
current lr 1.00000e-02
Epoch: [146][0/391]	Time 0.256 (0.256)	Data 0.180 (0.180)	Loss 0.0028 (0.0028)	Prec@1 100.000 (100.000)
Epoch: [146][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0257 (0.0127)	Prec@1 99.219 (99.663)
Epoch: [146][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0206 (0.0116)	Prec@1 98.438 (99.667)
Epoch: [146][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.0160 (0.01

Epoch: [153][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0152 (0.0095)	Prec@1 100.000 (99.786)
Epoch: [153][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0298 (0.0092)	Prec@1 98.438 (99.801)
Epoch: [153][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0094 (0.0091)	Prec@1 99.219 (99.811)
Epoch: [153][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0167 (0.0093)	Prec@1 99.219 (99.806)
Test: [0/79]	Time 0.186 (0.186)	Loss 0.2396 (0.2396)	Prec@1 92.969 (92.969)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2948 (0.3284)	Prec@1 92.969 (92.754)
 * Prec@1 92.540
current lr 1.00000e-03
Epoch: [154][0/391]	Time 0.257 (0.257)	Data 0.179 (0.179)	Loss 0.0018 (0.0018)	Prec@1 100.000 (100.000)
Epoch: [154][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0060 (0.0074)	Prec@1 100.000 (99.877)
Epoch: [154][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0058 (0.0078)	Prec@1 100.000 (99.861)
Epoch: [154][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.0085 (0.00

Epoch: [161][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0053 (0.0083)	Prec@1 100.000 (99.837)
Epoch: [161][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0042 (0.0081)	Prec@1 100.000 (99.841)
Epoch: [161][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0101 (0.0083)	Prec@1 100.000 (99.844)
Epoch: [161][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0091 (0.0083)	Prec@1 100.000 (99.835)
Test: [0/79]	Time 0.188 (0.188)	Loss 0.2428 (0.2428)	Prec@1 92.969 (92.969)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2790 (0.3275)	Prec@1 92.969 (92.862)
 * Prec@1 92.670
current lr 1.00000e-03
Epoch: [162][0/391]	Time 0.262 (0.262)	Data 0.181 (0.181)	Loss 0.0057 (0.0057)	Prec@1 100.000 (100.000)
Epoch: [162][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0093 (0.0089)	Prec@1 100.000 (99.816)
Epoch: [162][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0081 (0.0094)	Prec@1 100.000 (99.760)
Epoch: [162][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.0266 (0

Epoch: [169][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0028 (0.0083)	Prec@1 100.000 (99.810)
Epoch: [169][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0048 (0.0082)	Prec@1 100.000 (99.816)
Epoch: [169][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0053 (0.0081)	Prec@1 100.000 (99.829)
Epoch: [169][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0067 (0.0078)	Prec@1 100.000 (99.838)
Test: [0/79]	Time 0.183 (0.183)	Loss 0.2336 (0.2336)	Prec@1 92.969 (92.969)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2743 (0.3275)	Prec@1 93.750 (92.846)
 * Prec@1 92.710
current lr 1.00000e-03
Epoch: [170][0/391]	Time 0.265 (0.265)	Data 0.184 (0.184)	Loss 0.0024 (0.0024)	Prec@1 100.000 (100.000)
Epoch: [170][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0035 (0.0081)	Prec@1 100.000 (99.770)
Epoch: [170][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0070 (0.0085)	Prec@1 100.000 (99.791)
Epoch: [170][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.0128 (0

Epoch: [177][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0100 (0.0077)	Prec@1 99.219 (99.837)
Epoch: [177][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0064 (0.0076)	Prec@1 100.000 (99.841)
Epoch: [177][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0030 (0.0078)	Prec@1 100.000 (99.824)
Epoch: [177][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0033 (0.0077)	Prec@1 100.000 (99.831)
Test: [0/79]	Time 0.196 (0.196)	Loss 0.2258 (0.2258)	Prec@1 93.750 (93.750)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2831 (0.3277)	Prec@1 92.969 (92.770)
 * Prec@1 92.660
current lr 1.00000e-03
Epoch: [178][0/391]	Time 0.257 (0.257)	Data 0.180 (0.180)	Loss 0.0178 (0.0178)	Prec@1 99.219 (99.219)
Epoch: [178][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0052 (0.0079)	Prec@1 100.000 (99.831)
Epoch: [178][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0084 (0.0077)	Prec@1 100.000 (99.845)
Epoch: [178][150/391]	Time 0.076 (0.077)	Data 0.000 (0.001)	Loss 0.0017 (0.00

Epoch: [185][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0162 (0.0072)	Prec@1 100.000 (99.868)
Epoch: [185][250/391]	Time 0.076 (0.076)	Data 0.000 (0.001)	Loss 0.0159 (0.0073)	Prec@1 99.219 (99.854)
Epoch: [185][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0130 (0.0072)	Prec@1 99.219 (99.855)
Epoch: [185][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0041 (0.0072)	Prec@1 100.000 (99.855)
Test: [0/79]	Time 0.186 (0.186)	Loss 0.2074 (0.2074)	Prec@1 93.750 (93.750)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2889 (0.3321)	Prec@1 93.750 (92.953)
 * Prec@1 92.820
current lr 1.00000e-03
Epoch: [186][0/391]	Time 0.259 (0.259)	Data 0.182 (0.182)	Loss 0.0022 (0.0022)	Prec@1 100.000 (100.000)
Epoch: [186][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0233 (0.0081)	Prec@1 99.219 (99.786)
Epoch: [186][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0294 (0.0078)	Prec@1 97.656 (99.799)
Epoch: [186][150/391]	Time 0.075 (0.077)	Data 0.000 (0.001)	Loss 0.0037 (0.008

Epoch: [193][200/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0061 (0.0071)	Prec@1 100.000 (99.880)
Epoch: [193][250/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0150 (0.0071)	Prec@1 99.219 (99.885)
Epoch: [193][300/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0051 (0.0070)	Prec@1 100.000 (99.894)
Epoch: [193][350/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0032 (0.0069)	Prec@1 100.000 (99.891)
Test: [0/79]	Time 0.190 (0.190)	Loss 0.2379 (0.2379)	Prec@1 92.969 (92.969)
Test: [50/79]	Time 0.018 (0.021)	Loss 0.2773 (0.3316)	Prec@1 93.750 (93.015)
 * Prec@1 92.810
current lr 1.00000e-03
Epoch: [194][0/391]	Time 0.259 (0.259)	Data 0.182 (0.182)	Loss 0.0029 (0.0029)	Prec@1 100.000 (100.000)
Epoch: [194][50/391]	Time 0.075 (0.079)	Data 0.000 (0.004)	Loss 0.0136 (0.0059)	Prec@1 100.000 (99.908)
Epoch: [194][100/391]	Time 0.075 (0.077)	Data 0.000 (0.002)	Loss 0.0261 (0.0065)	Prec@1 98.438 (99.907)
Epoch: [194][150/391]	Time 0.075 (0.076)	Data 0.000 (0.001)	Loss 0.0016 (0.0