In [1]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim

import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

In [2]:
SEED=1
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True

In [3]:
torch.cuda.device_count()

1

In [4]:
START_EPOCH = 0
ARCH = 'resnet18'
EPOCHS = 200
LR = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
PRINT_FREQ = 50
TRAIN_BATCH=256
VAL_BATCH=256
WORKERS=8
TRAINDIR="/workspace/storage/train"
VALDIR="/workspace/storage/val"

In [5]:
if not torch.cuda.is_available():
    print('GPU not detected.. did you pass through your GPU?')

In [6]:
GPU = 0
torch.cuda.set_device(GPU)
cudnn.benchmark = True

In [7]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if GPU is not None:
            images = images.cuda(GPU, non_blocking=True)
        if torch.cuda.is_available():
            target = target.cuda(GPU, non_blocking=True)

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINT_FREQ == 0:
            progress.display(i)

In [8]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            if GPU is not None:
                images = images.cuda(GPU, non_blocking=True)
            if torch.cuda.is_available():
                target = target.cuda(GPU, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg

In [9]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [10]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [11]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [12]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    #lr = LR * (0.1 ** (epoch // 30))
    lr = LR * (0.1 ** (epoch // 17))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [13]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [14]:
imagenet_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
imagenet_std_RGB = [0.229, 0.224, 0.225]
cinic_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
cinic_std_RGB = [0.24205776, 0.23828046, 0.25874835]
cifar_mean_RGB = [0.4914, 0.4822, 0.4465]
cifar_std_RGB = [0.2023, 0.1994, 0.2010]

In [15]:
normalize = transforms.Normalize(mean=imagenet_mean_RGB, std=imagenet_std_RGB)

In [16]:
IMG_SIZE = 224

In [17]:
NUM_CLASSES = 1000

In [18]:
model = models.resnet18()

In [19]:
inf = model.fc.in_features

In [20]:
model.fc = nn.Linear(inf, NUM_CLASSES)

In [21]:
model.cuda(GPU)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [22]:
criterion = nn.CrossEntropyLoss().cuda(GPU)

In [23]:
optimizer = torch.optim.SGD(model.parameters(), LR,
                                momentum=MOMENTUM,
                                weight_decay=WEIGHT_DECAY)

In [24]:
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

In [25]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(cinic_mean_RGB, cinic_std_RGB),
])

In [26]:
# train_dataset = datasets.ImageFolder(
#     TRAINDIR, transform=transform_train)

train_dataset = datasets.ImageFolder(
    TRAINDIR,
    transforms.Compose([
        transforms.Resize(256),
        transforms.RandomResizedCrop(224),
        transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])
)

In [27]:
# transform_val = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize(cinic_mean_RGB, cinic_std_RGB),
# ])

val_dataset = datasets.ImageFolder(
    VALDIR,
    transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])
)

In [28]:
#val_dataset = datasets.ImageFolder(
   # VALDIR, transform=transform_val)

In [29]:
train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=TRAIN_BATCH, shuffle=True,
        num_workers=WORKERS, pin_memory=True, sampler=None)

In [30]:
val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=VAL_BATCH, shuffle=False,
        num_workers=WORKERS, pin_memory=True, sampler=None) 

In [31]:
best_acc1 = 0

In [32]:
for epoch in range(START_EPOCH, EPOCHS):
    
    #Implement Learning Rate Adjustment
    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion)

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)


    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)
    
    scheduler.step()
    print('lr: ' + str(scheduler.get_last_lr()))

Epoch: [0][   0/5005]	Time 12.996 (12.996)	Data  3.352 ( 3.352)	Loss 6.9885e+00 (6.9885e+00)	Acc@1   0.00 (  0.00)	Acc@5   0.78 (  0.78)
Epoch: [0][  50/5005]	Time  0.695 ( 0.926)	Data  0.000 ( 0.066)	Loss 6.9252e+00 (7.0227e+00)	Acc@1   0.00 (  0.21)	Acc@5   1.17 (  0.88)
Epoch: [0][ 100/5005]	Time  0.693 ( 0.812)	Data  0.000 ( 0.038)	Loss 6.8101e+00 (6.9366e+00)	Acc@1   0.00 (  0.26)	Acc@5   1.17 (  1.18)
Epoch: [0][ 150/5005]	Time  0.696 ( 0.775)	Data  0.000 ( 0.029)	Loss 6.7597e+00 (6.8709e+00)	Acc@1   0.39 (  0.37)	Acc@5   1.56 (  1.49)
Epoch: [0][ 200/5005]	Time  0.701 ( 0.756)	Data  0.000 ( 0.024)	Loss 6.6497e+00 (6.8184e+00)	Acc@1   0.39 (  0.43)	Acc@5   2.73 (  1.71)
Epoch: [0][ 250/5005]	Time  0.699 ( 0.744)	Data  0.000 ( 0.021)	Loss 6.5288e+00 (6.7684e+00)	Acc@1   1.17 (  0.47)	Acc@5   3.52 (  2.00)
Epoch: [0][ 300/5005]	Time  0.699 ( 0.737)	Data  0.000 ( 0.019)	Loss 6.4743e+00 (6.7241e+00)	Acc@1   1.17 (  0.53)	Acc@5   2.73 (  2.26)
Epoch: [0][ 350/5005]	Time  0.698 ( 0.732



Epoch: [0][ 950/5005]	Time  0.698 ( 0.712)	Data  0.000 ( 0.013)	Loss 5.6471e+00 (6.2633e+00)	Acc@1   5.47 (  1.72)	Acc@5  13.28 (  6.19)
Epoch: [0][1000/5005]	Time  0.700 ( 0.711)	Data  0.000 ( 0.012)	Loss 5.6284e+00 (6.2333e+00)	Acc@1   2.34 (  1.83)	Acc@5  10.94 (  6.51)
Epoch: [0][1050/5005]	Time  0.697 ( 0.710)	Data  0.000 ( 0.012)	Loss 5.6120e+00 (6.2049e+00)	Acc@1   2.73 (  1.94)	Acc@5  10.94 (  6.83)
Epoch: [0][1100/5005]	Time  0.704 ( 0.710)	Data  0.000 ( 0.012)	Loss 5.5534e+00 (6.1783e+00)	Acc@1   5.08 (  2.04)	Acc@5  13.67 (  7.12)
Epoch: [0][1150/5005]	Time  0.703 ( 0.710)	Data  0.000 ( 0.012)	Loss 5.5852e+00 (6.1503e+00)	Acc@1   3.52 (  2.15)	Acc@5  16.02 (  7.44)
Epoch: [0][1200/5005]	Time  0.702 ( 0.709)	Data  0.000 ( 0.012)	Loss 5.6322e+00 (6.1250e+00)	Acc@1   4.69 (  2.26)	Acc@5  12.11 (  7.74)
Epoch: [0][1250/5005]	Time  0.698 ( 0.709)	Data  0.000 ( 0.012)	Loss 5.5329e+00 (6.1007e+00)	Acc@1   6.25 (  2.36)	Acc@5  16.41 (  8.04)
Epoch: [0][1300/5005]	Time  0.697 ( 0.708



Epoch: [1][3050/5005]	Time  0.703 ( 0.704)	Data  0.000 ( 0.011)	Loss 3.7510e+00 (3.6467e+00)	Acc@1  24.22 ( 25.79)	Acc@5  51.17 ( 49.53)
Epoch: [1][3100/5005]	Time  0.700 ( 0.704)	Data  0.000 ( 0.011)	Loss 3.3772e+00 (3.6437e+00)	Acc@1  29.30 ( 25.84)	Acc@5  50.78 ( 49.58)
Epoch: [1][3150/5005]	Time  0.703 ( 0.704)	Data  0.000 ( 0.011)	Loss 3.5193e+00 (3.6404e+00)	Acc@1  29.69 ( 25.90)	Acc@5  53.52 ( 49.65)
Epoch: [1][3200/5005]	Time  0.708 ( 0.704)	Data  0.000 ( 0.011)	Loss 3.3056e+00 (3.6368e+00)	Acc@1  35.16 ( 25.96)	Acc@5  58.20 ( 49.72)
Epoch: [1][3250/5005]	Time  0.715 ( 0.704)	Data  0.000 ( 0.010)	Loss 3.5149e+00 (3.6330e+00)	Acc@1  28.52 ( 26.01)	Acc@5  51.17 ( 49.78)
Epoch: [1][3300/5005]	Time  0.721 ( 0.704)	Data  0.000 ( 0.010)	Loss 3.3099e+00 (3.6297e+00)	Acc@1  30.08 ( 26.06)	Acc@5  55.47 ( 49.84)
Epoch: [1][3350/5005]	Time  0.725 ( 0.705)	Data  0.000 ( 0.010)	Loss 3.4742e+00 (3.6268e+00)	Acc@1  33.20 ( 26.12)	Acc@5  53.91 ( 49.89)
Epoch: [1][3400/5005]	Time  0.729 ( 0.705



Epoch: [2][3250/5005]	Time  0.711 ( 0.705)	Data  0.000 ( 0.010)	Loss 3.1233e+00 (3.1369e+00)	Acc@1  33.98 ( 33.87)	Acc@5  57.42 ( 58.98)
Epoch: [2][3300/5005]	Time  0.711 ( 0.706)	Data  0.000 ( 0.010)	Loss 2.8333e+00 (3.1354e+00)	Acc@1  38.67 ( 33.88)	Acc@5  62.11 ( 59.01)
Epoch: [2][3350/5005]	Time  0.726 ( 0.706)	Data  0.000 ( 0.010)	Loss 2.7421e+00 (3.1343e+00)	Acc@1  37.50 ( 33.91)	Acc@5  66.41 ( 59.03)
Epoch: [2][3400/5005]	Time  0.735 ( 0.706)	Data  0.000 ( 0.010)	Loss 3.1224e+00 (3.1334e+00)	Acc@1  32.03 ( 33.92)	Acc@5  60.55 ( 59.04)
Epoch: [2][3450/5005]	Time  0.710 ( 0.706)	Data  0.000 ( 0.010)	Loss 3.2149e+00 (3.1321e+00)	Acc@1  30.47 ( 33.94)	Acc@5  59.38 ( 59.06)
Epoch: [2][3500/5005]	Time  0.705 ( 0.706)	Data  0.000 ( 0.010)	Loss 3.2509e+00 (3.1310e+00)	Acc@1  33.20 ( 33.97)	Acc@5  55.47 ( 59.08)
Epoch: [2][3550/5005]	Time  0.701 ( 0.706)	Data  0.000 ( 0.010)	Loss 2.9759e+00 (3.1297e+00)	Acc@1  37.50 ( 33.99)	Acc@5  60.16 ( 59.11)
Epoch: [2][3600/5005]	Time  0.701 ( 0.706



Epoch: [3][3450/5005]	Time  0.711 ( 0.707)	Data  0.000 ( 0.010)	Loss 2.9260e+00 (2.9170e+00)	Acc@1  37.50 ( 37.63)	Acc@5  62.50 ( 62.88)
Epoch: [3][3500/5005]	Time  0.704 ( 0.707)	Data  0.000 ( 0.010)	Loss 2.6401e+00 (2.9170e+00)	Acc@1  40.23 ( 37.63)	Acc@5  66.02 ( 62.88)
Epoch: [3][3550/5005]	Time  0.703 ( 0.707)	Data  0.000 ( 0.010)	Loss 3.0426e+00 (2.9169e+00)	Acc@1  39.06 ( 37.63)	Acc@5  59.77 ( 62.88)
Epoch: [3][3600/5005]	Time  0.698 ( 0.707)	Data  0.000 ( 0.010)	Loss 2.8266e+00 (2.9160e+00)	Acc@1  38.28 ( 37.64)	Acc@5  68.75 ( 62.89)
Epoch: [3][3650/5005]	Time  0.699 ( 0.707)	Data  0.000 ( 0.010)	Loss 2.9347e+00 (2.9157e+00)	Acc@1  39.84 ( 37.65)	Acc@5  65.23 ( 62.90)
Epoch: [3][3700/5005]	Time  0.699 ( 0.707)	Data  0.000 ( 0.010)	Loss 3.1907e+00 (2.9152e+00)	Acc@1  31.25 ( 37.66)	Acc@5  61.72 ( 62.91)
Epoch: [3][3750/5005]	Time  0.697 ( 0.706)	Data  0.000 ( 0.010)	Loss 2.9257e+00 (2.9149e+00)	Acc@1  39.84 ( 37.67)	Acc@5  66.41 ( 62.91)
Epoch: [3][3800/5005]	Time  0.697 ( 0.706



Epoch: [4][1900/5005]	Time  0.701 ( 0.704)	Data  0.000 ( 0.011)	Loss 2.6781e+00 (2.8033e+00)	Acc@1  38.67 ( 39.50)	Acc@5  66.02 ( 64.80)
Epoch: [4][1950/5005]	Time  0.699 ( 0.704)	Data  0.000 ( 0.011)	Loss 2.8165e+00 (2.8036e+00)	Acc@1  42.58 ( 39.51)	Acc@5  65.62 ( 64.80)
Epoch: [4][2000/5005]	Time  0.691 ( 0.704)	Data  0.000 ( 0.011)	Loss 3.1495e+00 (2.8036e+00)	Acc@1  36.33 ( 39.53)	Acc@5  55.47 ( 64.80)
Epoch: [4][2050/5005]	Time  0.697 ( 0.704)	Data  0.000 ( 0.011)	Loss 2.6920e+00 (2.8032e+00)	Acc@1  42.19 ( 39.55)	Acc@5  66.02 ( 64.82)
Epoch: [4][2100/5005]	Time  0.702 ( 0.704)	Data  0.000 ( 0.011)	Loss 2.8431e+00 (2.8030e+00)	Acc@1  40.23 ( 39.55)	Acc@5  62.50 ( 64.82)
Epoch: [4][2150/5005]	Time  0.699 ( 0.704)	Data  0.000 ( 0.011)	Loss 2.7608e+00 (2.8024e+00)	Acc@1  39.45 ( 39.56)	Acc@5  64.84 ( 64.82)
Epoch: [4][2200/5005]	Time  0.696 ( 0.703)	Data  0.000 ( 0.011)	Loss 2.6770e+00 (2.8028e+00)	Acc@1  42.97 ( 39.55)	Acc@5  67.19 ( 64.82)
Epoch: [4][2250/5005]	Time  0.700 ( 0.703



Epoch: [5][ 750/5005]	Time  0.712 ( 0.712)	Data  0.000 ( 0.014)	Loss 2.6397e+00 (2.7319e+00)	Acc@1  44.53 ( 40.94)	Acc@5  68.75 ( 66.04)
Epoch: [5][ 800/5005]	Time  0.722 ( 0.712)	Data  0.000 ( 0.014)	Loss 2.6402e+00 (2.7323e+00)	Acc@1  40.62 ( 40.93)	Acc@5  67.58 ( 66.07)
Epoch: [5][ 850/5005]	Time  0.734 ( 0.713)	Data  0.000 ( 0.013)	Loss 2.6254e+00 (2.7355e+00)	Acc@1  39.84 ( 40.88)	Acc@5  69.53 ( 66.03)
Epoch: [5][ 900/5005]	Time  0.717 ( 0.714)	Data  0.000 ( 0.013)	Loss 2.5730e+00 (2.7353e+00)	Acc@1  43.36 ( 40.87)	Acc@5  68.36 ( 66.04)
Epoch: [5][ 950/5005]	Time  0.707 ( 0.714)	Data  0.000 ( 0.013)	Loss 2.5723e+00 (2.7348e+00)	Acc@1  37.89 ( 40.86)	Acc@5  67.19 ( 66.04)
Epoch: [5][1000/5005]	Time  0.705 ( 0.713)	Data  0.000 ( 0.013)	Loss 2.6714e+00 (2.7354e+00)	Acc@1  39.45 ( 40.83)	Acc@5  64.45 ( 66.02)
Epoch: [5][1050/5005]	Time  0.705 ( 0.713)	Data  0.000 ( 0.013)	Loss 2.9156e+00 (2.7364e+00)	Acc@1  37.11 ( 40.83)	Acc@5  59.77 ( 65.99)
Epoch: [5][1100/5005]	Time  0.707 ( 0.713



Epoch: [6][2700/5005]	Time  0.699 ( 0.707)	Data  0.000 ( 0.011)	Loss 2.6533e+00 (2.6737e+00)	Acc@1  47.27 ( 41.87)	Acc@5  66.80 ( 67.01)
Epoch: [6][2750/5005]	Time  0.699 ( 0.706)	Data  0.000 ( 0.011)	Loss 2.6514e+00 (2.6735e+00)	Acc@1  39.84 ( 41.88)	Acc@5  67.97 ( 67.01)
Epoch: [6][2800/5005]	Time  0.702 ( 0.706)	Data  0.000 ( 0.011)	Loss 2.5875e+00 (2.6741e+00)	Acc@1  43.75 ( 41.86)	Acc@5  71.48 ( 67.01)
Epoch: [6][2850/5005]	Time  0.710 ( 0.706)	Data  0.000 ( 0.011)	Loss 2.7682e+00 (2.6735e+00)	Acc@1  38.28 ( 41.88)	Acc@5  67.19 ( 67.02)
Epoch: [6][2900/5005]	Time  0.718 ( 0.707)	Data  0.000 ( 0.011)	Loss 2.6503e+00 (2.6734e+00)	Acc@1  39.06 ( 41.89)	Acc@5  66.02 ( 67.02)
Epoch: [6][2950/5005]	Time  0.723 ( 0.707)	Data  0.000 ( 0.011)	Loss 2.7752e+00 (2.6736e+00)	Acc@1  41.02 ( 41.90)	Acc@5  62.89 ( 67.02)
Epoch: [6][3000/5005]	Time  0.725 ( 0.707)	Data  0.000 ( 0.011)	Loss 2.6218e+00 (2.6736e+00)	Acc@1  42.19 ( 41.90)	Acc@5  66.41 ( 67.02)
Epoch: [6][3050/5005]	Time  0.707 ( 0.707



Epoch: [7][ 600/5005]	Time  0.704 ( 0.716)	Data  0.000 ( 0.015)	Loss 2.7677e+00 (2.6111e+00)	Acc@1  41.02 ( 42.84)	Acc@5  66.80 ( 68.04)
Epoch: [7][ 650/5005]	Time  0.702 ( 0.715)	Data  0.000 ( 0.015)	Loss 2.7015e+00 (2.6151e+00)	Acc@1  42.58 ( 42.80)	Acc@5  66.41 ( 67.99)
Epoch: [7][ 700/5005]	Time  0.711 ( 0.714)	Data  0.000 ( 0.014)	Loss 2.5550e+00 (2.6191e+00)	Acc@1  46.09 ( 42.71)	Acc@5  73.05 ( 67.93)
Epoch: [7][ 750/5005]	Time  0.713 ( 0.714)	Data  0.000 ( 0.014)	Loss 2.4360e+00 (2.6196e+00)	Acc@1  44.53 ( 42.73)	Acc@5  69.92 ( 67.90)
Epoch: [7][ 800/5005]	Time  0.726 ( 0.714)	Data  0.000 ( 0.014)	Loss 2.5092e+00 (2.6182e+00)	Acc@1  44.53 ( 42.76)	Acc@5  69.53 ( 67.93)
Epoch: [7][ 850/5005]	Time  0.738 ( 0.715)	Data  0.000 ( 0.013)	Loss 2.7021e+00 (2.6193e+00)	Acc@1  42.19 ( 42.70)	Acc@5  65.62 ( 67.91)
Epoch: [7][ 900/5005]	Time  0.710 ( 0.716)	Data  0.000 ( 0.013)	Loss 2.4406e+00 (2.6232e+00)	Acc@1  45.70 ( 42.67)	Acc@5  72.27 ( 67.86)
Epoch: [7][ 950/5005]	Time  0.707 ( 0.715



Epoch: [8][2250/5005]	Time  0.699 ( 0.708)	Data  0.000 ( 0.011)	Loss 2.6262e+00 (2.6046e+00)	Acc@1  42.58 ( 43.14)	Acc@5  67.19 ( 68.24)
Epoch: [8][2300/5005]	Time  0.703 ( 0.708)	Data  0.000 ( 0.011)	Loss 2.5987e+00 (2.6049e+00)	Acc@1  42.97 ( 43.14)	Acc@5  68.36 ( 68.23)
Epoch: [8][2350/5005]	Time  0.700 ( 0.708)	Data  0.000 ( 0.011)	Loss 2.5734e+00 (2.6048e+00)	Acc@1  42.19 ( 43.15)	Acc@5  69.14 ( 68.23)
Epoch: [8][2400/5005]	Time  0.697 ( 0.708)	Data  0.000 ( 0.011)	Loss 2.7213e+00 (2.6051e+00)	Acc@1  39.84 ( 43.14)	Acc@5  64.84 ( 68.23)
Epoch: [8][2450/5005]	Time  0.697 ( 0.708)	Data  0.000 ( 0.011)	Loss 2.4871e+00 (2.6049e+00)	Acc@1  45.31 ( 43.13)	Acc@5  69.14 ( 68.23)
Epoch: [8][2500/5005]	Time  0.698 ( 0.707)	Data  0.000 ( 0.011)	Loss 2.5718e+00 (2.6051e+00)	Acc@1  37.89 ( 43.14)	Acc@5  70.70 ( 68.23)
Epoch: [8][2550/5005]	Time  0.702 ( 0.707)	Data  0.000 ( 0.011)	Loss 2.5812e+00 (2.6054e+00)	Acc@1  41.80 ( 43.13)	Acc@5  70.31 ( 68.21)
Epoch: [8][2600/5005]	Time  0.696 ( 0.707



Epoch: [9][4050/5005]	Time  0.697 ( 0.707)	Data  0.000 ( 0.010)	Loss 2.6781e+00 (2.5812e+00)	Acc@1  38.28 ( 43.55)	Acc@5  66.02 ( 68.59)
Epoch: [9][4100/5005]	Time  0.699 ( 0.707)	Data  0.000 ( 0.010)	Loss 2.4301e+00 (2.5813e+00)	Acc@1  45.70 ( 43.54)	Acc@5  68.75 ( 68.59)
Epoch: [9][4150/5005]	Time  0.698 ( 0.707)	Data  0.000 ( 0.010)	Loss 2.5964e+00 (2.5816e+00)	Acc@1  42.58 ( 43.54)	Acc@5  71.48 ( 68.59)
Epoch: [9][4200/5005]	Time  0.702 ( 0.707)	Data  0.000 ( 0.010)	Loss 2.5057e+00 (2.5818e+00)	Acc@1  43.36 ( 43.54)	Acc@5  66.80 ( 68.59)
Epoch: [9][4250/5005]	Time  0.697 ( 0.706)	Data  0.000 ( 0.010)	Loss 2.6192e+00 (2.5822e+00)	Acc@1  42.97 ( 43.54)	Acc@5  66.02 ( 68.58)
Epoch: [9][4300/5005]	Time  0.699 ( 0.706)	Data  0.000 ( 0.010)	Loss 2.3486e+00 (2.5821e+00)	Acc@1  47.27 ( 43.54)	Acc@5  76.56 ( 68.59)
Epoch: [9][4350/5005]	Time  0.699 ( 0.706)	Data  0.000 ( 0.010)	Loss 2.5714e+00 (2.5823e+00)	Acc@1  41.41 ( 43.53)	Acc@5  70.70 ( 68.58)
Epoch: [9][4400/5005]	Time  0.704 ( 0.706



Epoch: [10][2050/5005]	Time  0.722 ( 0.710)	Data  0.000 ( 0.011)	Loss 2.4595e+00 (2.5554e+00)	Acc@1  43.36 ( 43.96)	Acc@5  71.09 ( 69.01)
Epoch: [10][2100/5005]	Time  0.732 ( 0.711)	Data  0.000 ( 0.011)	Loss 2.5578e+00 (2.5560e+00)	Acc@1  44.14 ( 43.96)	Acc@5  65.23 ( 69.00)
Epoch: [10][2150/5005]	Time  0.717 ( 0.711)	Data  0.000 ( 0.011)	Loss 2.3954e+00 (2.5560e+00)	Acc@1  44.53 ( 43.95)	Acc@5  71.09 ( 69.00)
Epoch: [10][2200/5005]	Time  0.701 ( 0.711)	Data  0.000 ( 0.011)	Loss 2.5978e+00 (2.5570e+00)	Acc@1  43.75 ( 43.93)	Acc@5  66.41 ( 68.98)
Epoch: [10][2250/5005]	Time  0.700 ( 0.711)	Data  0.000 ( 0.011)	Loss 2.8667e+00 (2.5582e+00)	Acc@1  40.62 ( 43.92)	Acc@5  61.72 ( 68.95)
Epoch: [10][2300/5005]	Time  0.701 ( 0.711)	Data  0.000 ( 0.011)	Loss 2.5123e+00 (2.5583e+00)	Acc@1  44.53 ( 43.92)	Acc@5  71.09 ( 68.94)
Epoch: [10][2350/5005]	Time  0.701 ( 0.711)	Data  0.000 ( 0.011)	Loss 2.2492e+00 (2.5574e+00)	Acc@1  49.22 ( 43.93)	Acc@5  71.88 ( 68.96)
Epoch: [10][2400/5005]	Time  0.699



Epoch: [11][ 800/5005]	Time  0.723 ( 0.715)	Data  0.000 ( 0.013)	Loss 2.5197e+00 (2.5275e+00)	Acc@1  46.09 ( 44.60)	Acc@5  71.09 ( 69.46)
Epoch: [11][ 850/5005]	Time  0.748 ( 0.716)	Data  0.000 ( 0.013)	Loss 2.7127e+00 (2.5280e+00)	Acc@1  41.80 ( 44.59)	Acc@5  64.45 ( 69.44)
Epoch: [11][ 900/5005]	Time  0.710 ( 0.716)	Data  0.000 ( 0.013)	Loss 2.7899e+00 (2.5265e+00)	Acc@1  42.97 ( 44.60)	Acc@5  66.02 ( 69.47)
Epoch: [11][ 950/5005]	Time  0.708 ( 0.716)	Data  0.000 ( 0.012)	Loss 2.3865e+00 (2.5279e+00)	Acc@1  50.00 ( 44.60)	Acc@5  73.83 ( 69.45)
Epoch: [11][1000/5005]	Time  0.710 ( 0.715)	Data  0.000 ( 0.012)	Loss 2.4359e+00 (2.5268e+00)	Acc@1  42.58 ( 44.60)	Acc@5  74.22 ( 69.46)
Epoch: [11][1050/5005]	Time  0.708 ( 0.715)	Data  0.000 ( 0.012)	Loss 2.6286e+00 (2.5281e+00)	Acc@1  44.53 ( 44.57)	Acc@5  68.75 ( 69.43)
Epoch: [11][1100/5005]	Time  0.711 ( 0.714)	Data  0.000 ( 0.012)	Loss 2.4926e+00 (2.5274e+00)	Acc@1  46.48 ( 44.60)	Acc@5  70.70 ( 69.44)
Epoch: [11][1150/5005]	Time  0.713



Epoch: [12][3100/5005]	Time  0.698 ( 0.704)	Data  0.000 ( 0.011)	Loss 2.3595e+00 (2.5280e+00)	Acc@1  47.66 ( 44.49)	Acc@5  73.05 ( 69.47)
Epoch: [12][3150/5005]	Time  0.695 ( 0.704)	Data  0.000 ( 0.011)	Loss 2.7002e+00 (2.5287e+00)	Acc@1  41.02 ( 44.46)	Acc@5  67.58 ( 69.46)
Epoch: [12][3200/5005]	Time  0.696 ( 0.704)	Data  0.000 ( 0.011)	Loss 2.2620e+00 (2.5283e+00)	Acc@1  48.44 ( 44.48)	Acc@5  73.44 ( 69.47)
Epoch: [12][3250/5005]	Time  0.698 ( 0.704)	Data  0.000 ( 0.010)	Loss 2.5801e+00 (2.5286e+00)	Acc@1  44.14 ( 44.47)	Acc@5  69.92 ( 69.46)
Epoch: [12][3300/5005]	Time  0.699 ( 0.704)	Data  0.000 ( 0.010)	Loss 2.5062e+00 (2.5288e+00)	Acc@1  46.48 ( 44.48)	Acc@5  71.48 ( 69.46)
Epoch: [12][3350/5005]	Time  0.695 ( 0.704)	Data  0.000 ( 0.010)	Loss 2.4112e+00 (2.5288e+00)	Acc@1  47.66 ( 44.48)	Acc@5  70.70 ( 69.46)
Epoch: [12][3400/5005]	Time  0.697 ( 0.704)	Data  0.000 ( 0.010)	Loss 2.3548e+00 (2.5294e+00)	Acc@1  45.70 ( 44.48)	Acc@5  73.83 ( 69.44)
Epoch: [12][3450/5005]	Time  0.699



Epoch: [13][1850/5005]	Time  0.703 ( 0.705)	Data  0.000 ( 0.011)	Loss 2.4551e+00 (2.5186e+00)	Acc@1  41.41 ( 44.63)	Acc@5  71.48 ( 69.55)
Epoch: [13][1900/5005]	Time  0.702 ( 0.705)	Data  0.000 ( 0.011)	Loss 2.1177e+00 (2.5178e+00)	Acc@1  49.22 ( 44.65)	Acc@5  75.39 ( 69.57)
Epoch: [13][1950/5005]	Time  0.706 ( 0.705)	Data  0.000 ( 0.011)	Loss 2.6112e+00 (2.5182e+00)	Acc@1  44.92 ( 44.63)	Acc@5  69.14 ( 69.56)
Epoch: [13][2000/5005]	Time  0.712 ( 0.705)	Data  0.000 ( 0.011)	Loss 2.4004e+00 (2.5176e+00)	Acc@1  47.27 ( 44.65)	Acc@5  71.09 ( 69.56)
Epoch: [13][2050/5005]	Time  0.710 ( 0.705)	Data  0.000 ( 0.011)	Loss 2.3869e+00 (2.5175e+00)	Acc@1  46.09 ( 44.64)	Acc@5  71.09 ( 69.57)
Epoch: [13][2100/5005]	Time  0.717 ( 0.705)	Data  0.000 ( 0.011)	Loss 2.2828e+00 (2.5168e+00)	Acc@1  49.61 ( 44.66)	Acc@5  73.44 ( 69.59)
Epoch: [13][2150/5005]	Time  0.730 ( 0.706)	Data  0.000 ( 0.011)	Loss 2.6674e+00 (2.5180e+00)	Acc@1  39.84 ( 44.65)	Acc@5  68.75 ( 69.57)
Epoch: [13][2200/5005]	Time  0.713



Epoch: [14][3800/5005]	Time  0.698 ( 0.710)	Data  0.000 ( 0.010)	Loss 2.2383e+00 (2.5131e+00)	Acc@1  48.05 ( 44.78)	Acc@5  71.48 ( 69.78)
Epoch: [14][3850/5005]	Time  0.698 ( 0.710)	Data  0.000 ( 0.010)	Loss 2.3545e+00 (2.5132e+00)	Acc@1  46.09 ( 44.77)	Acc@5  71.48 ( 69.78)
Epoch: [14][3900/5005]	Time  0.697 ( 0.710)	Data  0.000 ( 0.010)	Loss 2.5122e+00 (2.5138e+00)	Acc@1  42.19 ( 44.77)	Acc@5  70.70 ( 69.77)
Epoch: [14][3950/5005]	Time  0.700 ( 0.710)	Data  0.000 ( 0.010)	Loss 2.6600e+00 (2.5137e+00)	Acc@1  41.02 ( 44.77)	Acc@5  66.80 ( 69.77)
Epoch: [14][4000/5005]	Time  0.702 ( 0.710)	Data  0.000 ( 0.010)	Loss 2.6993e+00 (2.5140e+00)	Acc@1  42.19 ( 44.76)	Acc@5  64.84 ( 69.76)
Epoch: [14][4050/5005]	Time  0.707 ( 0.710)	Data  0.000 ( 0.010)	Loss 2.6373e+00 (2.5146e+00)	Acc@1  46.09 ( 44.76)	Acc@5  67.58 ( 69.75)
Epoch: [14][4100/5005]	Time  0.702 ( 0.709)	Data  0.000 ( 0.010)	Loss 2.6251e+00 (2.5145e+00)	Acc@1  45.31 ( 44.76)	Acc@5  69.53 ( 69.75)
Epoch: [14][4150/5005]	Time  0.711



Epoch: [15][ 250/5005]	Time  0.702 ( 0.718)	Data  0.000 ( 0.020)	Loss 2.3220e+00 (2.4719e+00)	Acc@1  46.48 ( 45.53)	Acc@5  73.83 ( 70.23)
Epoch: [15][ 300/5005]	Time  0.697 ( 0.714)	Data  0.000 ( 0.018)	Loss 2.3110e+00 (2.4740e+00)	Acc@1  46.48 ( 45.50)	Acc@5  71.88 ( 70.16)
Epoch: [15][ 350/5005]	Time  0.691 ( 0.712)	Data  0.000 ( 0.017)	Loss 2.3317e+00 (2.4744e+00)	Acc@1  48.05 ( 45.46)	Acc@5  73.05 ( 70.18)
Epoch: [15][ 400/5005]	Time  0.693 ( 0.710)	Data  0.000 ( 0.016)	Loss 2.4892e+00 (2.4743e+00)	Acc@1  44.53 ( 45.51)	Acc@5  71.88 ( 70.20)
Epoch: [15][ 450/5005]	Time  0.699 ( 0.709)	Data  0.000 ( 0.015)	Loss 2.3818e+00 (2.4757e+00)	Acc@1  49.61 ( 45.49)	Acc@5  72.27 ( 70.15)
Epoch: [15][ 500/5005]	Time  0.699 ( 0.708)	Data  0.000 ( 0.015)	Loss 2.4607e+00 (2.4810e+00)	Acc@1  43.36 ( 45.39)	Acc@5  69.53 ( 70.08)
Epoch: [15][ 550/5005]	Time  0.696 ( 0.707)	Data  0.000 ( 0.014)	Loss 2.6408e+00 (2.4824e+00)	Acc@1  44.53 ( 45.34)	Acc@5  65.23 ( 70.06)
Epoch: [15][ 600/5005]	Time  0.700



Epoch: [16][1200/5005]	Time  0.709 ( 0.714)	Data  0.000 ( 0.012)	Loss 2.2304e+00 (2.4844e+00)	Acc@1  46.09 ( 45.29)	Acc@5  78.12 ( 70.32)
Epoch: [16][1250/5005]	Time  0.718 ( 0.714)	Data  0.000 ( 0.012)	Loss 2.2720e+00 (2.4847e+00)	Acc@1  48.44 ( 45.30)	Acc@5  72.66 ( 70.29)
Epoch: [16][1300/5005]	Time  0.726 ( 0.714)	Data  0.000 ( 0.012)	Loss 2.2472e+00 (2.4847e+00)	Acc@1  48.44 ( 45.29)	Acc@5  75.00 ( 70.29)
Epoch: [16][1350/5005]	Time  0.739 ( 0.715)	Data  0.000 ( 0.012)	Loss 2.6223e+00 (2.4848e+00)	Acc@1  42.97 ( 45.28)	Acc@5  65.62 ( 70.28)
Epoch: [16][1400/5005]	Time  0.711 ( 0.715)	Data  0.000 ( 0.012)	Loss 2.5061e+00 (2.4860e+00)	Acc@1  46.09 ( 45.26)	Acc@5  70.31 ( 70.26)
Epoch: [16][1450/5005]	Time  0.700 ( 0.714)	Data  0.000 ( 0.012)	Loss 2.3098e+00 (2.4861e+00)	Acc@1  48.44 ( 45.27)	Acc@5  69.92 ( 70.25)
Epoch: [16][1500/5005]	Time  0.704 ( 0.714)	Data  0.000 ( 0.012)	Loss 2.4933e+00 (2.4871e+00)	Acc@1  44.92 ( 45.25)	Acc@5  71.48 ( 70.23)
Epoch: [16][1550/5005]	Time  0.704



Epoch: [17][4550/5005]	Time  0.700 ( 0.705)	Data  0.000 ( 0.010)	Loss 1.8146e+00 (1.9853e+00)	Acc@1  61.72 ( 55.22)	Acc@5  79.69 ( 77.86)
Epoch: [17][4600/5005]	Time  0.698 ( 0.705)	Data  0.000 ( 0.010)	Loss 1.6255e+00 (1.9845e+00)	Acc@1  61.33 ( 55.24)	Acc@5  85.55 ( 77.87)
Epoch: [17][4650/5005]	Time  0.696 ( 0.705)	Data  0.000 ( 0.010)	Loss 1.8590e+00 (1.9838e+00)	Acc@1  57.81 ( 55.25)	Acc@5  76.56 ( 77.88)
Epoch: [17][4700/5005]	Time  0.701 ( 0.705)	Data  0.000 ( 0.010)	Loss 2.0354e+00 (1.9829e+00)	Acc@1  53.91 ( 55.27)	Acc@5  75.78 ( 77.89)
Epoch: [17][4750/5005]	Time  0.702 ( 0.705)	Data  0.000 ( 0.010)	Loss 1.8470e+00 (1.9823e+00)	Acc@1  58.98 ( 55.28)	Acc@5  78.91 ( 77.90)
Epoch: [17][4800/5005]	Time  0.701 ( 0.705)	Data  0.000 ( 0.010)	Loss 1.7959e+00 (1.9813e+00)	Acc@1  59.38 ( 55.30)	Acc@5  79.69 ( 77.92)
Epoch: [17][4850/5005]	Time  0.700 ( 0.705)	Data  0.000 ( 0.010)	Loss 1.7307e+00 (1.9801e+00)	Acc@1  60.16 ( 55.33)	Acc@5  80.47 ( 77.93)
Epoch: [17][4900/5005]	Time  0.697



Epoch: [18][4850/5005]	Time  0.706 ( 0.708)	Data  0.000 ( 0.010)	Loss 1.8766e+00 (1.8576e+00)	Acc@1  54.69 ( 57.69)	Acc@5  78.52 ( 79.75)
Epoch: [18][4900/5005]	Time  0.702 ( 0.708)	Data  0.000 ( 0.010)	Loss 1.9082e+00 (1.8570e+00)	Acc@1  57.42 ( 57.70)	Acc@5  78.12 ( 79.76)
Epoch: [18][4950/5005]	Time  0.705 ( 0.708)	Data  0.000 ( 0.010)	Loss 1.8114e+00 (1.8569e+00)	Acc@1  58.59 ( 57.71)	Acc@5  78.12 ( 79.76)


Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/multiprocessing/queues.py", line 245, in _feed
    send_bytes(obj)
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 411, in _send_bytes
    self._send(header + buf)
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/opt/conda/lib/python3.8/multiprocessing/queues.py", line 245, in _feed
    send_bytes(obj)
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 411, in _send_bytes
    self._send(header + buf)
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", 

KeyboardInterrupt: 