In [1]:
import json
import os
import time
import random
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import shutil

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torchsummary import summary
from sklearn.model_selection import train_test_split

from model_pytorch import EfficientNet
from utils import Bar,Logger, AverageMeter, accuracy, mkdir_p, savefig
from warmup_scheduler import GradualWarmupScheduler

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import collections

In [2]:
# GPU Device
gpu_id = 0
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
use_cuda = torch.cuda.is_available()
print("GPU device %d:" %(gpu_id), use_cuda)

GPU device 0: True


# Arguments

In [3]:
source_dir = '/media/data2/dataset/GAN_ImageData/StyleGAN_256/'
target_dir = '/media/data2/dataset/GAN_ImageData/StarGAN_128/'

In [4]:
pretrained = './log/style1/128/b1/checkpoint.pth.tar'
resume = ''

In [5]:
# Model
model_name = 'efficientnet-b1' # b0-b7 scale

# Optimization
num_classes = 2
epochs = 4000
start_epoch = 0
train_batch = 100
test_batch = 200
lr = 0.01
schedule = [500, 1000, 2000, 3000]
momentum = 0.9
gamma = 0.1 # LR is multiplied by gamma on schedule

# CheckPoint
checkpoint = './log/style1/128/b1/to_star/l2sp' # dir
if not os.path.isdir(checkpoint):
    os.makedirs(checkpoint)
num_workers = 4

# Seed
manual_seed = 7
random.seed(manual_seed)
torch.cuda.manual_seed_all(manual_seed)

# Image
size = (128, 128)

# sp
sp_alpha = 0.1
sp_beta = 0.1
fc_name = '_fc.'

# iterative training
feedback = 0
iter_time = []

# cutmix
cm_prob = 0.5
cm_prob_init = 0.99
cm_prob_low = 0.01

best_acc = 0

In [6]:
state = {}
state['num_classes'] = num_classes
state['epochs'] = epochs
state['start_epoch'] = start_epoch
state['train_batch'] = train_batch
state['test_batch'] = test_batch
state['lr'] = lr
state['schedule'] = schedule
state['momentum'] = momentum
state['gamma'] = gamma

# Dataset

In [7]:
train_dir = os.path.join(target_dir, '100_shot_style1')
source_train_dir = os.path.join(target_dir, '100_shot_style1_only')
val_target_dir = os.path.join(target_dir, 'validation')
val_source_dir = os.path.join(source_dir, 'validation')

train_aug = transforms.Compose([
    transforms.RandomAffine(degrees=2, translate=(0.02, 0.02), scale=(0.98, 1.02), shear=2, fillcolor=(124,117,104)),
    transforms.Resize(size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.3, scale=(0.02, 0.10), ratio=(0.3, 3.3), value=0, inplace=True),
])
val_aug = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
])

# pin_memory : cuda pin memeory use
train_loader = DataLoader(datasets.ImageFolder(train_dir, transform=train_aug),
                          batch_size=train_batch, shuffle=True, num_workers=num_workers, pin_memory=True)
source_train_loader = DataLoader(datasets.ImageFolder(source_train_dir, transform=val_aug),
                                batch_size=train_batch, shuffle=True, num_workers=num_workers, pin_memory=True)
val_target_loader = DataLoader(datasets.ImageFolder(val_target_dir, val_aug),
                       batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)
val_source_loader = DataLoader(datasets.ImageFolder(val_source_dir, val_aug),
                       batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)

# Model

In [8]:
teacher_model = EfficientNet.from_name(model_name, num_classes=num_classes)
student_model = EfficientNet.from_name(model_name, num_classes=num_classes)

# Pre-trained
if pretrained:
    print("=> using pre-trained model '{}'".format(pretrained))
    teacher_model.load_state_dict(torch.load(pretrained)['state_dict'])
    student_model.load_state_dict(torch.load(pretrained)['state_dict'])

=> using pre-trained model './log/style1/128/b1/checkpoint.pth.tar'


In [9]:
teacher_model.to('cuda')
student_model.to('cuda')
cudnn.benchmark = True
print('    Total params: %.2fM' % (sum(p.numel() for p in student_model.parameters())/1000000.0))

    Total params: 6.52M


In [10]:
for param in teacher_model.parameters():
    param.requires_grad = False
teacher_model.eval()

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_bn0): GroupNorm(8, 32, eps=1e-05, affine=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): GroupNorm(8, 32, eps=1e-05, affine=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
        (static_padding): Identity()
      )
      (_bn2): GroupNorm(4, 16, ep

In [11]:
teacher_model_weights = {}
for name, param in teacher_model.named_parameters():
    teacher_model_weights[name] = param.detach()

In [22]:
for i, param in teacher_model.named_parameters():
    if not i.startswith('_fc'):
        print(i)

_conv_stem.weight
_bn0.weight
_bn0.bias
_blocks.0._depthwise_conv.weight
_blocks.0._bn1.weight
_blocks.0._bn1.bias
_blocks.0._se_reduce.weight
_blocks.0._se_reduce.bias
_blocks.0._se_expand.weight
_blocks.0._se_expand.bias
_blocks.0._project_conv.weight
_blocks.0._bn2.weight
_blocks.0._bn2.bias
_blocks.1._depthwise_conv.weight
_blocks.1._bn1.weight
_blocks.1._bn1.bias
_blocks.1._se_reduce.weight
_blocks.1._se_reduce.bias
_blocks.1._se_expand.weight
_blocks.1._se_expand.bias
_blocks.1._project_conv.weight
_blocks.1._bn2.weight
_blocks.1._bn2.bias
_blocks.2._expand_conv.weight
_blocks.2._bn0.weight
_blocks.2._bn0.bias
_blocks.2._depthwise_conv.weight
_blocks.2._bn1.weight
_blocks.2._bn1.bias
_blocks.2._se_reduce.weight
_blocks.2._se_reduce.bias
_blocks.2._se_expand.weight
_blocks.2._se_expand.bias
_blocks.2._project_conv.weight
_blocks.2._bn2.weight
_blocks.2._bn2.bias
_blocks.3._expand_conv.weight
_blocks.3._bn0.weight
_blocks.3._bn0.bias
_blocks.3._depthwise_conv.weight
_blocks.3._bn1.

In [23]:
teacher_model_weights['_bn1.weight']

tensor([1.9097e-04, 1.3666e-04, 1.6796e-04,  ..., 1.3282e-04, 7.9702e-02,
        8.5992e-05], device='cuda:0')

In [12]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [13]:
def reg_cls(model):
    l2_cls = torch.tensor(0.).cuda()
    for name, param in model.named_parameters():
        if name.startswith(fc_name):
            l2_cls += 0.5 * torch.norm(param) ** 2
    return l2_cls

def reg_l2sp(model):
    sp_loss = torch.tensor(0.).cuda()
    for name, param in model.named_parameters():
        if not name.startswith(fc_name):
            sp_loss += 0.5 * torch.norm(param - teacher_model_weights[name]) ** 2
    return sp_loss

# Loss

In [14]:
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(student_model.parameters(), lr=lr, momentum=momentum)
# optimizer = optim.Adam(model.parameters(), weight_decay=0)
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=4, total_epoch=50, after_scheduler=scheduler_cosine)

In [15]:
# Resume
if resume:
    print('==> Resuming from checkpoint..')
    checkpoint = os.path.dirname(resume)
#     checkpoint = torch.load(resume)
    resume = torch.load(resume)
    best_acc = resume['best_acc']
    start_epoch = resume['epoch']
    student_model.load_state_dict(resume['state_dict'])
    optimizer.load_state_dict(resume['optimizer'])
    logger = Logger(os.path.join(checkpoint, 'log.txt'), resume=True)
else:
    logger = Logger(os.path.join(checkpoint, 'log.txt'))
    logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.', 'Source Loss', 'Source ACC.'])

# Train

In [16]:
def train(train_loader, source_train_loader, teacher_model, student_model, criterion, optimizer, epoch, use_cuda):
    student_model.train()
    torch.set_grad_enabled(True)
    
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    end = time.time()
    
#     source_set = []
#     for inputs, targets in source_train_loader:
#         source_set.append((inputs, targets))
    
    bar = Bar('Processing', max=len(train_loader))
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        batch_size = inputs.size(0)
        if batch_size < train_batch:
            continue
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
#         inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)
        
        # cutmix
#         source_inputs, source_targets = source_set[batch_idx]
#         source_inputs, source_targets = source_inputs.cuda(), source_targets.cuda()

#         prob_delta = cm_prob_init - cm_prob_low
#         prob_step = epoch / (epochs+1) * prob_delta
#         lam = cm_prob_init - prob_step

#         rand_index = torch.randperm(inputs.size()[0]).cuda()
#         st = source_targets[rand_index]
#         tt = targets[rand_index]
#         rand_index = rand_index[st == tt]
        
#         bbx1, bby1, bbx2, bby2 = rand_bbox(inputs.size(), lam)
#         inputs[rand_index, :, bbx1:bbx2, bby1:bby2] = source_inputs[rand_index, :, bbx1:bbx2, bby1:bby2]
#         lam = 1 - ((bbx2 - bbx1)*(bby2 - bby1) / (inputs.size()[-1] * inputs.size()[-2]))
        
        outputs = student_model(inputs)
        loss_main = criterion(outputs, targets)
        loss_cls = 0
        loss_sp = 0
        loss_cls = reg_cls(student_model)
        loss_sp = reg_l2sp(student_model)
            
            
        # compute output
        loss = loss_main + sp_alpha*loss_sp + sp_beta*loss_cls

        # measure accuracy and record loss
        prec1 = accuracy(outputs.data, targets.data)
        losses.update(loss.data.tolist(), inputs.size(0))
        top1.update(prec1[0], inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} '.format(
                    batch=batch_idx + 1,
                    size=len(train_loader),
                    data=data_time.val,
                    bt=batch_time.val,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    loss=losses.avg,
                    top1=top1.avg,
                    )
        bar.next()
#         if batch_idx % 10 == 0:
        print('{batch}/{size} Data:{data:.3f} | Batch:{bt:.3f} | Total:{total:} | ETA:{eta:} | Loss:{loss:} | top1:{tp1:}'.format(
                 batch=batch_idx+1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, tp1=top1.avg))
    bar.finish()
    return (losses.avg, top1.avg)

In [17]:
def test(val_loader, model, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    torch.set_grad_enabled(False)

    end = time.time()
    bar = Bar('Processing', max=len(val_loader))
    for batch_idx, (inputs, targets) in enumerate(val_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
#         inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss_main = criterion(outputs, targets)
        loss_cls = 0
        loss_sp = 0
        loss_cls = reg_cls(model)
        loss_sp = reg_l2sp(model)
        loss = loss_main + sp_alpha*loss_sp + sp_beta*loss_cls

        # measure accuracy and record loss
        prec1 = accuracy(outputs.data, targets.data)
        losses.update(loss.data.tolist(), inputs.size(0))
        top1.update(prec1[0], inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:} | top1: {top1:}'.format(
                    batch=batch_idx + 1,
                    size=len(val_loader),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    loss=losses.avg,
                    top1=top1.avg,)
        bar.next()
    print('{batch}/{size} Data:{data:.3f} | Batch:{bt:.3f} | Total:{total:} | ETA:{eta:} | Loss:{loss:} | top1:{tp1:}'.format(
         batch=batch_idx+1, size=len(val_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, tp1=top1.avg))
    bar.finish()
    return (losses.avg, top1.avg)

In [18]:
def save_checkpoint(state, is_best, checkpoint='checkpoint', filename='checkpoint.pth.tar'):
    filepath = os.path.join(checkpoint, filename)
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(checkpoint, 'model_best.pth.tar'))

def adjust_learning_rate(optimizer, epoch):
    global state
    lr_set = [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]
    lr_list = schedule.copy()
    lr_list.append(epoch)
    lr_list.sort()
    idx = lr_list.index(epoch)
    state['lr'] *= lr_set[idx]
    for param_group in optimizer.param_groups:
        param_group['lr'] = state['lr']

In [None]:
for epoch in range(start_epoch, epochs):
    # teacher feedback
    if epoch in iter_time:
        print("iterative training: feedback {}".format(epoch))
        teacher_model.load_state_dict(student_model.state_dict())
        teacher_model_weights = {}
        for name, param in teacher_model.named_parameters():
            teacher_model_weights[name] = param.detach()
    
    state['lr'] = optimizer.state_dict()['param_groups'][0]['lr']
    adjust_learning_rate(optimizer, epoch)
    print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, epochs, state['lr']))
    
    train_loss, train_acc = train(train_loader, source_train_loader, teacher_model, student_model, criterion, optimizer, epoch, use_cuda)
    
    if epoch % 50 == 0:
        test_loss, test_acc = test(val_target_loader, student_model, criterion, epoch, use_cuda)
        source_loss, source_acc = test(val_source_loader, student_model, criterion, epoch, use_cuda)

    
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc, source_loss, source_acc])
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict' : student_model.state_dict(),
            'acc': test_acc,
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict(),
        }, is_best, checkpoint=checkpoint)
    scheduler_warmup.step()


Epoch: [1 | 4000] LR: 0.010000
1/4 Data:2.200 | Batch:3.984 | Total:0:00:03 | ETA:0:00:12 | Loss:3.1258292198181152 | top1:68.0
2/4 Data:0.001 | Batch:0.341 | Total:0:00:04 | ETA:0:00:05 | Loss:2.6153738498687744 | top1:73.0
3/4 Data:0.012 | Batch:0.272 | Total:0:00:04 | ETA:0:00:02 | Loss:2.3423405090967813 | top1:73.33333587646484
4/4 Data:0.009 | Batch:0.287 | Total:0:00:04 | ETA:0:00:00 | Loss:2.2284773886203766 | top1:70.0
153/153 Data:0.000 | Batch:0.443 | Total:0:00:21 | ETA:0:00:00 | Loss:1.373285769478834 | top1:50.409568786621094
39/39 Data:0.000 | Batch:0.132 | Total:0:00:08 | ETA:0:00:00 | Loss:1.1798848402805817 | top1:65.03845977783203

Epoch: [2 | 4000] LR: 0.010600
1/4 Data:0.511 | Batch:0.849 | Total:0:00:00 | ETA:0:00:03 | Loss:1.2829450368881226 | top1:60.0
2/4 Data:0.009 | Batch:0.271 | Total:0:00:01 | ETA:0:00:02 | Loss:1.1835493445396423 | top1:65.0
3/4 Data:0.009 | Batch:0.267 | Total:0:00:01 | ETA:0:00:01 | Loss:1.1202774047851562 | top1:68.66667175292969
4/4 D

4/4 Data:0.009 | Batch:0.264 | Total:0:00:01 | ETA:0:00:00 | Loss:0.5182725414633751 | top1:83.75

Epoch: [20 | 4000] LR: 0.021400
1/4 Data:0.552 | Batch:0.826 | Total:0:00:00 | ETA:0:00:03 | Loss:0.49501368403434753 | top1:83.0
2/4 Data:0.009 | Batch:0.269 | Total:0:00:01 | ETA:0:00:02 | Loss:0.4956454336643219 | top1:85.0
3/4 Data:0.009 | Batch:0.252 | Total:0:00:01 | ETA:0:00:01 | Loss:0.490856796503067 | top1:85.0
4/4 Data:0.009 | Batch:0.258 | Total:0:00:01 | ETA:0:00:00 | Loss:0.4891681373119354 | top1:85.0

Epoch: [21 | 4000] LR: 0.022000
1/4 Data:0.535 | Batch:0.834 | Total:0:00:00 | ETA:0:00:03 | Loss:0.4559907019138336 | top1:86.0
2/4 Data:0.009 | Batch:0.267 | Total:0:00:01 | ETA:0:00:02 | Loss:0.44226670265197754 | top1:87.5
3/4 Data:0.009 | Batch:0.268 | Total:0:00:01 | ETA:0:00:01 | Loss:0.46217767397562665 | top1:86.66667175292969
4/4 Data:0.009 | Batch:0.266 | Total:0:00:01 | ETA:0:00:00 | Loss:0.48380914330482483 | top1:85.5

Epoch: [22 | 4000] LR: 0.022600
1/4 Data:0.

1/4 Data:0.537 | Batch:0.835 | Total:0:00:00 | ETA:0:00:03 | Loss:0.7011773586273193 | top1:87.0
2/4 Data:0.006 | Batch:0.275 | Total:0:00:01 | ETA:0:00:02 | Loss:0.685376912355423 | top1:88.0
3/4 Data:0.009 | Batch:0.262 | Total:0:00:01 | ETA:0:00:01 | Loss:0.7008503874142965 | top1:87.33333587646484
4/4 Data:0.009 | Batch:0.262 | Total:0:00:01 | ETA:0:00:00 | Loss:0.6860953271389008 | top1:86.25

Epoch: [40 | 4000] LR: 0.033400
1/4 Data:0.506 | Batch:0.792 | Total:0:00:00 | ETA:0:00:03 | Loss:0.6221504211425781 | top1:88.0
2/4 Data:0.009 | Batch:0.265 | Total:0:00:01 | ETA:0:00:02 | Loss:0.6098925173282623 | top1:88.5
3/4 Data:0.009 | Batch:0.272 | Total:0:00:01 | ETA:0:00:01 | Loss:0.5919881264368693 | top1:90.33333587646484
4/4 Data:0.009 | Batch:0.275 | Total:0:00:01 | ETA:0:00:00 | Loss:0.5786881446838379 | top1:90.5

Epoch: [41 | 4000] LR: 0.034000
1/4 Data:0.495 | Batch:0.779 | Total:0:00:00 | ETA:0:00:03 | Loss:0.6002225279808044 | top1:89.0
2/4 Data:0.009 | Batch:0.269 | Tota

4/4 Data:0.009 | Batch:0.273 | Total:0:00:01 | ETA:0:00:00 | Loss:38.07147789001465 | top1:85.75

Epoch: [58 | 4000] LR: 0.040000
1/4 Data:0.518 | Batch:0.803 | Total:0:00:00 | ETA:0:00:03 | Loss:32.928993225097656 | top1:88.0
2/4 Data:0.009 | Batch:0.258 | Total:0:00:01 | ETA:0:00:02 | Loss:32.06612682342529 | top1:86.5
3/4 Data:0.009 | Batch:0.271 | Total:0:00:01 | ETA:0:00:01 | Loss:31.142632166544598 | top1:86.66667175292969
4/4 Data:0.009 | Batch:0.271 | Total:0:00:01 | ETA:0:00:00 | Loss:30.273503303527832 | top1:85.0

Epoch: [59 | 4000] LR: 0.040000
1/4 Data:0.514 | Batch:0.790 | Total:0:00:00 | ETA:0:00:03 | Loss:26.433372497558594 | top1:77.0
2/4 Data:0.009 | Batch:0.257 | Total:0:00:01 | ETA:0:00:02 | Loss:26.169713973999023 | top1:82.0
3/4 Data:0.009 | Batch:0.261 | Total:0:00:01 | ETA:0:00:01 | Loss:26.169313430786133 | top1:80.33333587646484
4/4 Data:0.009 | Batch:0.292 | Total:0:00:01 | ETA:0:00:00 | Loss:27.780853748321533 | top1:80.25

Epoch: [60 | 4000] LR: 0.040000
1/

1/4 Data:0.504 | Batch:0.782 | Total:0:00:00 | ETA:0:00:03 | Loss:955.4873657226562 | top1:82.0
2/4 Data:0.009 | Batch:0.272 | Total:0:00:01 | ETA:0:00:02 | Loss:943.9866638183594 | top1:85.0
3/4 Data:0.009 | Batch:0.271 | Total:0:00:01 | ETA:0:00:01 | Loss:930.9735107421875 | top1:85.33333587646484
4/4 Data:0.009 | Batch:0.271 | Total:0:00:01 | ETA:0:00:00 | Loss:916.6408996582031 | top1:83.75

Epoch: [78 | 4000] LR: 0.039996
1/4 Data:0.517 | Batch:0.805 | Total:0:00:00 | ETA:0:00:03 | Loss:838.961181640625 | top1:88.0
2/4 Data:0.009 | Batch:0.270 | Total:0:00:01 | ETA:0:00:02 | Loss:820.5375061035156 | top1:87.5
3/4 Data:0.009 | Batch:0.280 | Total:0:00:01 | ETA:0:00:01 | Loss:801.5166625976562 | top1:88.33333587646484
4/4 Data:0.009 | Batch:0.274 | Total:0:00:01 | ETA:0:00:00 | Loss:782.0586853027344 | top1:87.0

Epoch: [79 | 4000] LR: 0.039996
1/4 Data:0.518 | Batch:0.791 | Total:0:00:00 | ETA:0:00:03 | Loss:683.1930541992188 | top1:88.0
2/4 Data:0.009 | Batch:0.265 | Total:0:00:01

3/4 Data:0.009 | Batch:0.281 | Total:0:00:01 | ETA:0:00:01 | Loss:7.19892676671346 | top1:86.33333587646484
4/4 Data:0.009 | Batch:0.283 | Total:0:00:01 | ETA:0:00:00 | Loss:7.019128680229187 | top1:84.0

Epoch: [97 | 4000] LR: 0.039988
1/4 Data:0.532 | Batch:0.823 | Total:0:00:00 | ETA:0:00:03 | Loss:6.059637069702148 | top1:78.0
2/4 Data:0.009 | Batch:0.284 | Total:0:00:01 | ETA:0:00:02 | Loss:5.801584482192993 | top1:82.0
3/4 Data:0.009 | Batch:0.285 | Total:0:00:01 | ETA:0:00:01 | Loss:5.613518397013347 | top1:82.33333587646484
4/4 Data:0.009 | Batch:0.279 | Total:0:00:01 | ETA:0:00:00 | Loss:5.42866837978363 | top1:82.25

Epoch: [98 | 4000] LR: 0.039987
1/4 Data:0.549 | Batch:0.864 | Total:0:00:00 | ETA:0:00:03 | Loss:4.527224540710449 | top1:85.0
2/4 Data:0.010 | Batch:0.277 | Total:0:00:01 | ETA:0:00:02 | Loss:4.376650333404541 | top1:84.5
3/4 Data:0.009 | Batch:0.274 | Total:0:00:01 | ETA:0:00:01 | Loss:4.244332869847615 | top1:83.33333587646484
4/4 Data:0.009 | Batch:0.280 | T

1/4 Data:0.530 | Batch:0.802 | Total:0:00:00 | ETA:0:00:03 | Loss:19.3432674407959 | top1:87.0
2/4 Data:0.009 | Batch:0.259 | Total:0:00:01 | ETA:0:00:02 | Loss:19.44587993621826 | top1:86.5
3/4 Data:0.009 | Batch:0.274 | Total:0:00:01 | ETA:0:00:01 | Loss:19.56677182515462 | top1:85.33333587646484
4/4 Data:0.009 | Batch:0.269 | Total:0:00:01 | ETA:0:00:00 | Loss:19.706826210021973 | top1:85.5

Epoch: [116 | 4000] LR: 0.039975
1/4 Data:0.504 | Batch:0.786 | Total:0:00:00 | ETA:0:00:03 | Loss:20.455869674682617 | top1:82.0
2/4 Data:0.009 | Batch:0.269 | Total:0:00:01 | ETA:0:00:02 | Loss:20.610754013061523 | top1:83.0
3/4 Data:0.009 | Batch:0.271 | Total:0:00:01 | ETA:0:00:01 | Loss:20.70205561319987 | top1:84.33333587646484
4/4 Data:0.009 | Batch:0.270 | Total:0:00:01 | ETA:0:00:00 | Loss:20.792211055755615 | top1:84.5

Epoch: [117 | 4000] LR: 0.039974
1/4 Data:0.504 | Batch:0.798 | Total:0:00:00 | ETA:0:00:03 | Loss:21.24135971069336 | top1:76.0
2/4 Data:0.009 | Batch:0.274 | Total:0:

2/4 Data:0.021 | Batch:0.497 | Total:0:00:01 | ETA:0:00:02 | Loss:396.7054901123047 | top1:87.5
3/4 Data:0.019 | Batch:0.469 | Total:0:00:02 | ETA:0:00:01 | Loss:399.8188171386719 | top1:86.66667175292969
4/4 Data:0.021 | Batch:0.495 | Total:0:00:02 | ETA:0:00:00 | Loss:401.746826171875 | top1:86.0

Epoch: [135 | 4000] LR: 0.039958
1/4 Data:0.515 | Batch:0.973 | Total:0:00:00 | ETA:0:00:03 | Loss:405.928466796875 | top1:87.0
2/4 Data:0.022 | Batch:0.497 | Total:0:00:01 | ETA:0:00:02 | Loss:403.7155303955078 | top1:89.5
3/4 Data:0.021 | Batch:0.467 | Total:0:00:01 | ETA:0:00:01 | Loss:400.7393086751302 | top1:86.0
4/4 Data:0.021 | Batch:0.497 | Total:0:00:02 | ETA:0:00:00 | Loss:396.97764587402344 | top1:85.75

Epoch: [136 | 4000] LR: 0.039956
1/4 Data:0.537 | Batch:0.974 | Total:0:00:00 | ETA:0:00:03 | Loss:417.10516357421875 | top1:91.0
2/4 Data:0.019 | Batch:0.485 | Total:0:00:01 | ETA:0:00:02 | Loss:465.8824157714844 | top1:89.5
3/4 Data:0.019 | Batch:0.485 | Total:0:00:01 | ETA:0:0

1/4 Data:0.522 | Batch:0.799 | Total:0:00:00 | ETA:0:00:03 | Loss:165.2030487060547 | top1:85.0
2/4 Data:0.009 | Batch:0.252 | Total:0:00:01 | ETA:0:00:02 | Loss:157.8375473022461 | top1:83.5
3/4 Data:0.009 | Batch:0.262 | Total:0:00:01 | ETA:0:00:01 | Loss:150.77782185872397 | top1:85.33333587646484
4/4 Data:0.009 | Batch:0.254 | Total:0:00:01 | ETA:0:00:00 | Loss:144.08459091186523 | top1:82.25

Epoch: [154 | 4000] LR: 0.039936
1/4 Data:0.527 | Batch:0.795 | Total:0:00:00 | ETA:0:00:03 | Loss:111.98147583007812 | top1:84.0
2/4 Data:0.009 | Batch:0.256 | Total:0:00:01 | ETA:0:00:02 | Loss:106.52997970581055 | top1:84.5
3/4 Data:0.009 | Batch:0.256 | Total:0:00:01 | ETA:0:00:01 | Loss:101.42574564615886 | top1:79.33333587646484
4/4 Data:0.009 | Batch:0.253 | Total:0:00:01 | ETA:0:00:00 | Loss:96.51642417907715 | top1:79.5

Epoch: [155 | 4000] LR: 0.039935
1/4 Data:0.510 | Batch:0.814 | Total:0:00:00 | ETA:0:00:03 | Loss:73.514892578125 | top1:71.0
2/4 Data:0.009 | Batch:0.261 | Total:0

2/4 Data:0.009 | Batch:0.271 | Total:0:00:01 | ETA:0:00:02 | Loss:8064.005615234375 | top1:83.5
3/4 Data:0.009 | Batch:0.280 | Total:0:00:01 | ETA:0:00:01 | Loss:7791.223307291667 | top1:81.33333587646484
4/4 Data:0.009 | Batch:0.276 | Total:0:00:01 | ETA:0:00:00 | Loss:7523.60302734375 | top1:80.5

Epoch: [173 | 4000] LR: 0.039910
1/4 Data:0.537 | Batch:0.833 | Total:0:00:00 | ETA:0:00:03 | Loss:6214.6572265625 | top1:77.0
2/4 Data:0.009 | Batch:0.277 | Total:0:00:01 | ETA:0:00:02 | Loss:5972.024169921875 | top1:80.0
3/4 Data:0.009 | Batch:0.277 | Total:0:00:01 | ETA:0:00:01 | Loss:5736.880533854167 | top1:82.66667175292969
4/4 Data:0.009 | Batch:0.276 | Total:0:00:01 | ETA:0:00:00 | Loss:5509.4971923828125 | top1:83.75

Epoch: [174 | 4000] LR: 0.039908
1/4 Data:0.473 | Batch:0.776 | Total:0:00:00 | ETA:0:00:03 | Loss:4412.23876953125 | top1:85.0
2/4 Data:0.009 | Batch:0.274 | Total:0:00:01 | ETA:0:00:02 | Loss:4216.91455078125 | top1:82.5
3/4 Data:0.009 | Batch:0.270 | Total:0:00:01 

3/4 Data:0.009 | Batch:0.255 | Total:0:00:01 | ETA:0:00:01 | Loss:596.1337890625 | top1:83.66667175292969
4/4 Data:0.009 | Batch:0.258 | Total:0:00:01 | ETA:0:00:00 | Loss:588.1134796142578 | top1:84.5

Epoch: [192 | 4000] LR: 0.039879
1/4 Data:0.528 | Batch:0.794 | Total:0:00:00 | ETA:0:00:03 | Loss:544.7694702148438 | top1:80.0
2/4 Data:0.009 | Batch:0.260 | Total:0:00:01 | ETA:0:00:02 | Loss:534.4000854492188 | top1:75.5
3/4 Data:0.009 | Batch:0.264 | Total:0:00:01 | ETA:0:00:01 | Loss:523.5197245279948 | top1:77.33333587646484
4/4 Data:0.009 | Batch:0.269 | Total:0:00:01 | ETA:0:00:00 | Loss:512.3141632080078 | top1:80.0

Epoch: [193 | 4000] LR: 0.039877
1/4 Data:0.522 | Batch:0.794 | Total:0:00:00 | ETA:0:00:03 | Loss:455.11572265625 | top1:81.0
2/4 Data:0.009 | Batch:0.259 | Total:0:00:01 | ETA:0:00:02 | Loss:443.007568359375 | top1:85.0
3/4 Data:0.009 | Batch:0.253 | Total:0:00:01 | ETA:0:00:01 | Loss:430.9475504557292 | top1:80.66667175292969
4/4 Data:0.009 | Batch:0.270 | Tota

2/4 Data:0.009 | Batch:0.252 | Total:0:00:01 | ETA:0:00:02 | Loss:1977.29833984375 | top1:84.0
3/4 Data:0.009 | Batch:0.251 | Total:0:00:01 | ETA:0:00:01 | Loss:1962.1908772786458 | top1:84.66667175292969
4/4 Data:0.009 | Batch:0.256 | Total:0:00:01 | ETA:0:00:00 | Loss:1943.1628723144531 | top1:82.0

Epoch: [211 | 4000] LR: 0.039844
1/4 Data:0.493 | Batch:0.763 | Total:0:00:00 | ETA:0:00:03 | Loss:1830.245849609375 | top1:85.0
2/4 Data:0.009 | Batch:0.254 | Total:0:00:01 | ETA:0:00:02 | Loss:1798.4346923828125 | top1:86.0
3/4 Data:0.009 | Batch:0.256 | Total:0:00:01 | ETA:0:00:01 | Loss:1764.4986979166667 | top1:83.33333587646484
4/4 Data:0.009 | Batch:0.268 | Total:0:00:01 | ETA:0:00:00 | Loss:1728.7530212402344 | top1:84.0

Epoch: [212 | 4000] LR: 0.039842
1/4 Data:0.490 | Batch:0.775 | Total:0:00:00 | ETA:0:00:03 | Loss:1542.8916015625 | top1:89.0
2/4 Data:0.009 | Batch:0.253 | Total:0:00:01 | ETA:0:00:02 | Loss:1502.4141235351562 | top1:85.0
3/4 Data:0.009 | Batch:0.259 | Total:0:

3/4 Data:0.009 | Batch:0.251 | Total:0:00:01 | ETA:0:00:01 | Loss:256.9908498128255 | top1:87.33333587646484
4/4 Data:0.009 | Batch:0.253 | Total:0:00:01 | ETA:0:00:00 | Loss:263.66682052612305 | top1:84.25

Epoch: [230 | 4000] LR: 0.039805
1/4 Data:0.504 | Batch:0.784 | Total:0:00:00 | ETA:0:00:03 | Loss:292.5658264160156 | top1:83.0
2/4 Data:0.009 | Batch:0.265 | Total:0:00:01 | ETA:0:00:02 | Loss:296.2397003173828 | top1:86.0
3/4 Data:0.009 | Batch:0.262 | Total:0:00:01 | ETA:0:00:01 | Loss:299.2996419270833 | top1:87.33333587646484
4/4 Data:0.009 | Batch:0.265 | Total:0:00:01 | ETA:0:00:00 | Loss:301.7266616821289 | top1:85.5

Epoch: [231 | 4000] LR: 0.039803
1/4 Data:0.500 | Batch:0.785 | Total:0:00:00 | ETA:0:00:03 | Loss:310.45684814453125 | top1:87.0
2/4 Data:0.009 | Batch:0.276 | Total:0:00:01 | ETA:0:00:02 | Loss:310.21482849121094 | top1:88.5
3/4 Data:0.009 | Batch:0.271 | Total:0:00:01 | ETA:0:00:01 | Loss:309.3359680175781 | top1:86.66667175292969
4/4 Data:0.009 | Batch:0.