In [1]:
import json
import os
import time
import random
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import shutil

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torchsummary import summary
from sklearn.model_selection import train_test_split

from model_pytorch import EfficientNet
from utils import Bar,Logger, AverageMeter, accuracy, mkdir_p, savefig
from warmup_scheduler import GradualWarmupScheduler

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import collections

In [2]:
# GPU Device
gpu_id = 3
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
use_cuda = torch.cuda.is_available()
print("GPU device %d:" %(gpu_id), use_cuda)

GPU device 3: True


# Arguments

In [3]:
source_dir = '/media/data2/dataset/GAN_ImageData/StyleGAN2_256/'
target_dir = '/media/data2/dataset/GAN_ImageData/PGGAN_128/'

In [4]:
pretrained = './log/style1/128/b1_2/checkpoint.pth.tar'
resume = ''

In [5]:
# Model
model_name = 'efficientnet-b1' # b0-b7 scale

# Optimization
num_classes = 2
epochs = 4000
start_epoch = 0
batch_ratio = 14
train_batch = 10
source_batch = train_batch * batch_ratio
test_batch = 200
lr = 0.04
schedule = [500, 2000, 3000]
momentum = 0.9
gamma = 0.1 # LR is multiplied by gamma on schedule

# CheckPoint
checkpoint = './log/style2/128/b1_2/to_pggan/self' # dir
if not os.path.isdir(checkpoint):
    os.makedirs(checkpoint)
num_workers = 8

# Seed
manual_seed = 7
random.seed(manual_seed)
torch.cuda.manual_seed_all(manual_seed)

# Image
size = (128, 128)

# sp
sp_alpha = 0.1
sp_beta = 0.1
fc_name = '_fc.'
feature_list = []

# iterative training
feedback = 0
iter_time = []

# cutmix
cm_prob = 0.5
cm_prob_init = 0.99
cm_prob_low = 0.01

# self-training
logsoftmax = nn.LogSoftmax(dim=-1)
softmax = nn.Softmax(dim=-1)

best_acc = 0

In [6]:
state = {}
state['num_classes'] = num_classes
state['epochs'] = epochs
state['start_epoch'] = start_epoch
state['train_batch'] = train_batch
state['test_batch'] = test_batch
state['lr'] = lr
state['schedule'] = schedule
state['momentum'] = momentum
state['gamma'] = gamma

# Dataset

In [7]:
train_dir = os.path.join(target_dir, '100_shot')
source_train_dir = os.path.join(source_dir, 'train')
val_target_dir = os.path.join(target_dir, 'validation')
val_source_dir = os.path.join(source_dir, 'validation')

train_aug = transforms.Compose([
    transforms.RandomAffine(degrees=2, translate=(0.02, 0.02), scale=(0.98, 1.02), shear=2, fillcolor=(124,117,104)),
    transforms.Resize(size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.3, scale=(0.02, 0.10), ratio=(0.3, 3.3), value=0, inplace=True),
])
val_aug = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
])

source_aug = transforms.Compose([
    transforms.RandomAffine(degrees=2, translate=(0.02, 0.02), scale=(0.98, 1.02), shear=2, fillcolor=(124,117,104)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomErasing(p=0.3, scale=(0.02, 0.10), ratio=(0.3, 3.3), value=0, inplace=True),
])

# pin_memory : cuda pin memeory use
train_loader = DataLoader(datasets.ImageFolder(train_dir, transform=train_aug),
                          batch_size=train_batch, shuffle=True, num_workers=num_workers, pin_memory=True)
source_loader = DataLoader(datasets.ImageFolder(source_train_dir, transform=val_aug),
                                batch_size=source_batch, shuffle=True, num_workers=num_workers, pin_memory=True)
val_target_loader = DataLoader(datasets.ImageFolder(val_target_dir, val_aug),
                       batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)
val_source_loader = DataLoader(datasets.ImageFolder(val_source_dir, val_aug),
                       batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)

# Model

In [8]:
director_model = EfficientNet.from_name(model_name, num_classes=num_classes,
                                      override_params={'dropout_rate':0.3, 'drop_connect_rate':0.3})
learner_model = EfficientNet.from_name(model_name, num_classes=num_classes,
                                      override_params={'dropout_rate':0.3, 'drop_connect_rate':0.3})

# Pre-trained
if pretrained:
    print("=> using pre-trained model '{}'".format(pretrained))
    director_model.load_state_dict(torch.load(pretrained)['state_dict'])
    learner_model.load_state_dict(torch.load(pretrained)['state_dict'])

=> using pre-trained model './log/style1/128/b1_2/checkpoint.pth.tar'


In [9]:
director_model.to('cuda')
learner_model.to('cuda')
cudnn.benchmark = True
print('    Total params: %.2fM' % (sum(p.numel() for p in learner_model.parameters())/1000000.0))

    Total params: 6.52M


In [10]:
for param in director_model.parameters():
    param.requires_grad = False
director_model.eval()

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_gn0): GroupNorm(8, 32, eps=1e-05, affine=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_gn1): GroupNorm(8, 32, eps=1e-05, affine=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
        (static_padding): Identity()
      )
      (_gn2): GroupNorm(8, 16, ep

In [11]:
director_model_weights = {}
for name, param in director_model.named_parameters():
    director_model_weights[name] = param.detach()

In [12]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [13]:
def reg_cls(model):
    l2_cls = torch.tensor(0.).cuda()
    for name, param in model.named_parameters():
        if name.startswith(fc_name):
            l2_cls += 0.5 * torch.norm(param) ** 2
    return l2_cls

def reg_l2sp(model):
    sp_loss = torch.tensor(0.).cuda()
    for name, param in model.named_parameters():
        if not name.startswith(fc_name):
            sp_loss += 0.5*torch.norm(param - director_model_weights[name])**2
    return sp_loss

# Loss

In [14]:
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(learner_model.parameters(), lr=lr, momentum=momentum)
# optimizer = optim.Adam(learner_model.parameters(), weight_decay=0)
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=4, total_epoch=10, after_scheduler=scheduler_cosine)

In [15]:
# Resume
if resume:
    print('==> Resuming from checkpoint..')
    checkpoint = os.path.dirname(resume)
#     checkpoint = torch.load(resume)
    resume = torch.load(resume)
    best_acc = resume['best_acc']
    start_epoch = resume['epoch']
    learner_model.load_state_dict(resume['state_dict'])
    optimizer.load_state_dict(resume['optimizer'])
    logger = Logger(os.path.join(checkpoint, 'log.txt'), resume=True)
else:
    logger = Logger(os.path.join(checkpoint, 'log.txt'))
    logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Source Loss', 'Train Acc.', 'Valid Acc.', 'Source ACC.'])

# Train

In [16]:
def train(train_loader, source_loader, teacher_model, student_model, criterion, optimizer, epoch, use_cuda):
    global feature_list
    student_model.train()
    teacher_model.eval()
    
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    end = time.time()
    
    bar = Bar('Processing', max=len(train_loader))
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        batch_size = inputs.size(0)
        if batch_size < train_batch:
            continue
        # measure data loading time
        data_time.update(time.time() - end)
        
        dataiter = iter(source_loader)
        source_inputs, source_targets = next(dataiter)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
            source_inputs, source_targets = source_inputs.cuda(), source_targets.cuda()
            
        # cutmix
        prob_delta = cm_prob_init - cm_prob_low
        prob_step = epoch / (epochs+1) * prob_delta
        lam = cm_prob_init - prob_step

        rand_index = torch.randperm(inputs.size()[0]).cuda()
        st = source_targets[rand_index*batch_ratio]
        tt = targets[rand_index]
        rand_index = rand_index[st == tt]
        
        bbx1, bby1, bbx2, bby2 = rand_bbox(inputs.size(), lam)
        source_inputs[rand_index*batch_ratio, :, bbx1:bbx2, bby1:bby2] = inputs[rand_index, :, bbx1:bbx2, bby1:bby2]
        
         # label-training
        outputs = student_model(inputs)
        
        # director
        director_outputs = teacher_model(source_inputs)
        loss_source = criterion(director_outputs, source_targets)
        
        adaptive_param = torch.tensor(0.).cuda()
        adaptive_param += loss_source.item()
        
        # learner learn
        learn_outputs = student_model(source_inputs)
        
        
        loss_main = criterion(outputs, targets)
        loss_sub = criterion(learn_outputs, source_targets)
        loss_cls = 0
        loss_sp = 0
        loss_cls = reg_cls(student_model)
        loss_sp = reg_l2sp(student_model)
            
            
        # compute output
        loss = loss_main + 0.1*loss_sub + sp_alpha*loss_sp + sp_beta*loss_cls

        # measure accuracy and record loss
        prec1 = accuracy(outputs.data, targets.data)
        losses.update(loss.data.tolist(), inputs.size(0))
        top1.update(prec1[0], inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} '.format(
                    batch=batch_idx + 1,
                    size=len(train_loader),
                    data=data_time.val,
                    bt=batch_time.val,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    loss=losses.avg,
                    top1=top1.avg,
                    )
        bar.next()
#         if batch_idx % 10 == 0:
        print('{batch}/{size} | Loss:{loss:.4f} | MainLoss:{total:.4f} | Adaptive:{data:.4f} | SubLoss:{eta:.4f} | | SPLoss:{sp:.4f} | top1:{tp1:}'.format(
                 batch=batch_idx+1, size=len(train_loader), data=adaptive_param, total=loss_main, eta=loss_sub, loss=losses.avg, sp=loss_sp, tp1=top1.avg))
    bar.finish()
    return (losses.avg, top1.avg)

In [17]:
def test(val_loader, model, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    bar = Bar('Processing', max=len(val_loader))
    for batch_idx, (inputs, targets) in enumerate(val_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
#         inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss_main = criterion(outputs, targets)
        loss_cls = 0
        loss_sp = 0
        loss_cls = reg_cls(model)
        loss_sp = reg_l2sp(model)
        loss = loss_main + sp_alpha*loss_sp + sp_beta*loss_cls

        # measure accuracy and record loss
        prec1 = accuracy(outputs.data, targets.data)
        losses.update(loss.data.tolist(), inputs.size(0))
        top1.update(prec1[0], inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:} | top1: {top1:}'.format(
                    batch=batch_idx + 1,
                    size=len(val_loader),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    loss=losses.avg,
                    top1=top1.avg,)
        bar.next()
    print('{batch}/{size} Data:{data:.3f} | Batch:{bt:.3f} | Total:{total:} | ETA:{eta:} | Loss:{loss:} | top1:{tp1:}'.format(
         batch=batch_idx+1, size=len(val_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, tp1=top1.avg))
    bar.finish()
    return (losses.avg, top1.avg)

In [18]:
def save_checkpoint(state, is_best, checkpoint='checkpoint', filename='checkpoint.pth.tar'):
    filepath = os.path.join(checkpoint, filename)
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(checkpoint, 'model_best.pth.tar'))

def adjust_learning_rate(optimizer, epoch):
    global state
    lr_set = [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]
    lr_list = schedule.copy()
    lr_list.append(epoch)
    lr_list.sort()
    idx = lr_list.index(epoch)
    state['lr'] *= lr_set[idx]
    for param_group in optimizer.param_groups:
        param_group['lr'] = state['lr']

In [None]:
for epoch in range(start_epoch, epochs):
    # teacher feedback
    if epoch in iter_time:
        print("iterative training: feedback {}".format(epoch))
        director_model.load_state_dict(learner_model.state_dict())
        dicrector_model_weights = {}
        for name, param in director_model.named_parameters():
            director_model_weights[name] = param.detach()
    
    state['lr'] = optimizer.state_dict()['param_groups'][0]['lr']
    adjust_learning_rate(optimizer, epoch)
    print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, epochs, state['lr']))
    
    train_loss, train_acc = train(train_loader, source_loader, director_model, learner_model, criterion, optimizer, epoch, use_cuda)
    
    if epoch % 50 == 0:
        with torch.no_grad():
            director_model.eval()
            test_loss, test_acc = test(val_target_loader, learner_model, criterion, epoch, use_cuda)
            source_loss, source_acc = test(val_source_loader, learner_model, criterion, epoch, use_cuda)

    
        logger.append([state['lr'], train_loss, test_loss, source_loss, train_acc, test_acc, source_acc])
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict' : learner_model.state_dict(),
            'acc': test_acc,
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict(),
        }, is_best, checkpoint=checkpoint)
    scheduler_warmup.step()


Epoch: [1 | 4000] LR: 0.040000
1/20 | Loss:0.7531 | MainLoss:0.6569 | Adaptive:0.7025 | SubLoss:0.7049 | | SPLoss:0.0000 | top1:70.0
2/20 | Loss:0.8010 | MainLoss:0.7539 | Adaptive:0.6954 | SubLoss:0.6920 | | SPLoss:0.0000 | top1:45.0
3/20 | Loss:0.7970 | MainLoss:0.6951 | Adaptive:0.6907 | SubLoss:0.6892 | | SPLoss:0.0000 | top1:46.66666793823242
4/20 | Loss:0.7869 | MainLoss:0.6617 | Adaptive:0.7060 | SubLoss:0.7050 | | SPLoss:0.0001 | top1:52.5
5/20 | Loss:0.7895 | MainLoss:0.7053 | Adaptive:0.7061 | SubLoss:0.7052 | | SPLoss:0.0001 | top1:50.0
6/20 | Loss:0.7847 | MainLoss:0.6676 | Adaptive:0.7084 | SubLoss:0.7023 | | SPLoss:0.0002 | top1:53.333335876464844
7/20 | Loss:0.7860 | MainLoss:0.7016 | Adaptive:0.7037 | SubLoss:0.6973 | | SPLoss:0.0002 | top1:51.42857360839844
8/20 | Loss:0.7870 | MainLoss:0.7033 | Adaptive:0.7025 | SubLoss:0.6984 | | SPLoss:0.0003 | top1:51.25
9/20 | Loss:0.7881 | MainLoss:0.7070 | Adaptive:0.7096 | SubLoss:0.6965 | | SPLoss:0.0005 | top1:50.0
10/20 | L

13/20 | Loss:0.7668 | MainLoss:0.6943 | Adaptive:0.7072 | SubLoss:0.6905 | | SPLoss:0.0007 | top1:50.0
14/20 | Loss:0.7663 | MainLoss:0.6907 | Adaptive:0.7084 | SubLoss:0.6924 | | SPLoss:0.0007 | top1:52.14285659790039
15/20 | Loss:0.7663 | MainLoss:0.6966 | Adaptive:0.6990 | SubLoss:0.6934 | | SPLoss:0.0007 | top1:50.66666793823242
16/20 | Loss:0.7662 | MainLoss:0.6932 | Adaptive:0.7061 | SubLoss:0.6929 | | SPLoss:0.0006 | top1:50.625
17/20 | Loss:0.7660 | MainLoss:0.6936 | Adaptive:0.7084 | SubLoss:0.6929 | | SPLoss:0.0006 | top1:50.0
18/20 | Loss:0.7658 | MainLoss:0.6922 | Adaptive:0.7084 | SubLoss:0.6941 | | SPLoss:0.0006 | top1:51.11111068725586
19/20 | Loss:0.7666 | MainLoss:0.7106 | Adaptive:0.7013 | SubLoss:0.6947 | | SPLoss:0.0006 | top1:49.47368621826172
20/20 | Loss:0.7660 | MainLoss:0.6838 | Adaptive:0.7155 | SubLoss:0.6970 | | SPLoss:0.0006 | top1:50.5

Epoch: [5 | 4000] LR: 0.088000
1/20 | Loss:0.7755 | MainLoss:0.7048 | Adaptive:0.7156 | SubLoss:0.7002 | | SPLoss:0.0006 

7/20 | Loss:0.8461 | MainLoss:0.7053 | Adaptive:0.7108 | SubLoss:0.6866 | | SPLoss:0.0114 | top1:45.71428680419922
8/20 | Loss:0.8337 | MainLoss:0.6744 | Adaptive:0.7049 | SubLoss:0.6897 | | SPLoss:0.0134 | top1:47.5
9/20 | Loss:0.8246 | MainLoss:0.6745 | Adaptive:0.7013 | SubLoss:0.7048 | | SPLoss:0.0151 | top1:48.88888931274414
10/20 | Loss:0.8181 | MainLoss:0.6766 | Adaptive:0.6990 | SubLoss:0.7259 | | SPLoss:0.0164 | top1:50.0
11/20 | Loss:0.8262 | MainLoss:0.8204 | Adaptive:0.7014 | SubLoss:0.7321 | | SPLoss:0.0172 | top1:49.09090805053711
12/20 | Loss:0.8391 | MainLoss:0.8915 | Adaptive:0.6990 | SubLoss:0.7431 | | SPLoss:0.0175 | top1:47.500003814697266
13/20 | Loss:0.8380 | MainLoss:0.7395 | Adaptive:0.6942 | SubLoss:0.7461 | | SPLoss:0.0175 | top1:47.69230651855469
14/20 | Loss:0.8387 | MainLoss:0.7670 | Adaptive:0.6978 | SubLoss:0.7175 | | SPLoss:0.0177 | top1:47.14285659790039
15/20 | Loss:0.8327 | MainLoss:0.6756 | Adaptive:0.7025 | SubLoss:0.6930 | | SPLoss:0.0181 | top1:48


Epoch: [12 | 4000] LR: 0.160000
1/20 | Loss:0.7168 | MainLoss:0.6384 | Adaptive:0.6942 | SubLoss:0.7148 | | SPLoss:0.0265 | top1:70.0
2/20 | Loss:0.8103 | MainLoss:0.8155 | Adaptive:0.7025 | SubLoss:0.7258 | | SPLoss:0.0352 | top1:55.0
3/20 | Loss:0.8043 | MainLoss:0.6956 | Adaptive:0.7049 | SubLoss:0.7347 | | SPLoss:0.0429 | top1:56.66666793823242
4/20 | Loss:0.8255 | MainLoss:0.7950 | Adaptive:0.7227 | SubLoss:0.6482 | | SPLoss:0.0498 | top1:55.0
5/20 | Loss:0.8048 | MainLoss:0.6171 | Adaptive:0.7061 | SubLoss:0.7354 | | SPLoss:0.0575 | top1:58.0
6/20 | Loss:0.8042 | MainLoss:0.6967 | Adaptive:0.7061 | SubLoss:0.7246 | | SPLoss:0.0642 | top1:58.333335876464844
7/20 | Loss:0.8114 | MainLoss:0.7488 | Adaptive:0.7001 | SubLoss:0.7435 | | SPLoss:0.0724 | top1:57.14285659790039
8/20 | Loss:0.8004 | MainLoss:0.6231 | Adaptive:0.7002 | SubLoss:0.7078 | | SPLoss:0.0861 | top1:58.75
9/20 | Loss:0.8009 | MainLoss:0.7064 | Adaptive:0.6989 | SubLoss:0.6986 | | SPLoss:0.0940 | top1:57.7777786254

15/20 | Loss:6.5588 | MainLoss:0.7378 | Adaptive:0.7037 | SubLoss:0.7285 | | SPLoss:115.0167 | top1:42.66666793823242
16/20 | Loss:6.8781 | MainLoss:0.7095 | Adaptive:0.7072 | SubLoss:0.7067 | | SPLoss:108.7183 | top1:42.5
17/20 | Loss:7.1084 | MainLoss:0.7240 | Adaptive:0.7096 | SubLoss:0.6838 | | SPLoss:99.8676 | top1:42.352943420410156
18/20 | Loss:7.2488 | MainLoss:0.6323 | Adaptive:0.7108 | SubLoss:0.6811 | | SPLoss:89.2070 | top1:43.88888931274414
19/20 | Loss:7.3154 | MainLoss:0.6795 | Adaptive:0.6989 | SubLoss:0.7284 | | SPLoss:77.4729 | top1:44.73684310913086
20/20 | Loss:7.3229 | MainLoss:0.8431 | Adaptive:0.7061 | SubLoss:0.7178 | | SPLoss:65.3487 | top1:44.5

Epoch: [16 | 4000] LR: 0.160000
1/20 | Loss:6.1144 | MainLoss:0.6849 | Adaptive:0.7037 | SubLoss:0.7190 | | SPLoss:53.4444 | top1:60.0
2/20 | Loss:5.5203 | MainLoss:0.6204 | Adaptive:0.7108 | SubLoss:0.6811 | | SPLoss:42.2734 | top1:65.0
3/20 | Loss:5.0511 | MainLoss:0.8144 | Adaptive:0.7073 | SubLoss:0.6916 | | SPLoss

9/20 | Loss:0.7912 | MainLoss:0.7046 | Adaptive:0.7060 | SubLoss:0.6865 | | SPLoss:0.1926 | top1:47.77777862548828
10/20 | Loss:0.7914 | MainLoss:0.7001 | Adaptive:0.7025 | SubLoss:0.6890 | | SPLoss:0.1865 | top1:48.0
11/20 | Loss:0.7873 | MainLoss:0.6552 | Adaptive:0.7121 | SubLoss:0.6796 | | SPLoss:0.1773 | top1:50.0
12/20 | Loss:0.7862 | MainLoss:0.6803 | Adaptive:0.6883 | SubLoss:0.7166 | | SPLoss:0.1662 | top1:50.833335876464844
13/20 | Loss:0.7947 | MainLoss:0.8047 | Adaptive:0.6978 | SubLoss:0.7043 | | SPLoss:0.1529 | top1:48.46154022216797
14/20 | Loss:0.7938 | MainLoss:0.6960 | Adaptive:0.7215 | SubLoss:0.6661 | | SPLoss:0.1375 | top1:48.57143020629883
15/20 | Loss:0.7932 | MainLoss:0.6992 | Adaptive:0.7013 | SubLoss:0.6923 | | SPLoss:0.1220 | top1:48.66666793823242
16/20 | Loss:0.7921 | MainLoss:0.6924 | Adaptive:0.6919 | SubLoss:0.6943 | | SPLoss:0.1075 | top1:48.75
17/20 | Loss:0.7922 | MainLoss:0.7108 | Adaptive:0.7073 | SubLoss:0.7009 | | SPLoss:0.0942 | top1:47.647060394

2/20 | Loss:0.9281 | MainLoss:0.7479 | Adaptive:0.7155 | SubLoss:0.8249 | | SPLoss:0.1095 | top1:40.0
3/20 | Loss:0.9142 | MainLoss:0.7847 | Adaptive:0.7084 | SubLoss:0.7653 | | SPLoss:0.1144 | top1:40.000003814697266
4/20 | Loss:0.8843 | MainLoss:0.7000 | Adaptive:0.6942 | SubLoss:0.6983 | | SPLoss:0.1249 | top1:42.5
5/20 | Loss:0.8647 | MainLoss:0.6918 | Adaptive:0.6978 | SubLoss:0.6945 | | SPLoss:0.1385 | top1:46.0
6/20 | Loss:0.8491 | MainLoss:0.6743 | Adaptive:0.7014 | SubLoss:0.6926 | | SPLoss:0.1466 | top1:48.333335876464844
7/20 | Loss:0.8533 | MainLoss:0.7780 | Adaptive:0.7002 | SubLoss:0.7074 | | SPLoss:0.1561 | top1:47.14285659790039
8/20 | Loss:0.8367 | MainLoss:0.6206 | Adaptive:0.7002 | SubLoss:0.7072 | | SPLoss:0.1462 | top1:50.0
9/20 | Loss:0.8400 | MainLoss:0.7680 | Adaptive:0.7060 | SubLoss:0.6976 | | SPLoss:0.1397 | top1:48.88888931274414
10/20 | Loss:0.8365 | MainLoss:0.7104 | Adaptive:0.7096 | SubLoss:0.6848 | | SPLoss:0.1208 | top1:49.0
11/20 | Loss:0.8374 | MainL

16/20 | Loss:0.8031 | MainLoss:0.6851 | Adaptive:0.6990 | SubLoss:0.7401 | | SPLoss:0.0892 | top1:45.625
17/20 | Loss:0.7945 | MainLoss:0.5556 | Adaptive:0.7096 | SubLoss:0.7872 | | SPLoss:0.0798 | top1:47.64706039428711
18/20 | Loss:0.8096 | MainLoss:0.9616 | Adaptive:0.7084 | SubLoss:0.8003 | | SPLoss:0.0737 | top1:46.11111068725586
19/20 | Loss:0.8105 | MainLoss:0.7289 | Adaptive:0.7049 | SubLoss:0.7536 | | SPLoss:0.0714 | top1:46.315792083740234
20/20 | Loss:0.8111 | MainLoss:0.7297 | Adaptive:0.7037 | SubLoss:0.7226 | | SPLoss:0.0765 | top1:46.0

Epoch: [27 | 4000] LR: 0.159994
1/20 | Loss:0.7784 | MainLoss:0.6889 | Adaptive:0.6966 | SubLoss:0.6999 | | SPLoss:0.0915 | top1:60.0
2/20 | Loss:0.7567 | MainLoss:0.6451 | Adaptive:0.7108 | SubLoss:0.6792 | | SPLoss:0.1112 | top1:65.0
3/20 | Loss:0.7648 | MainLoss:0.6801 | Adaptive:0.6989 | SubLoss:0.7293 | | SPLoss:0.1388 | top1:63.333335876464844
4/20 | Loss:0.8191 | MainLoss:0.8746 | Adaptive:0.7060 | SubLoss:0.7366 | | SPLoss:0.1607 

10/20 | Loss:0.7809 | MainLoss:0.6845 | Adaptive:0.7144 | SubLoss:0.7013 | | SPLoss:0.0055 | top1:47.0
11/20 | Loss:0.7788 | MainLoss:0.6863 | Adaptive:0.6918 | SubLoss:0.6911 | | SPLoss:0.0056 | top1:47.27272415161133
12/20 | Loss:0.7788 | MainLoss:0.7069 | Adaptive:0.6989 | SubLoss:0.6940 | | SPLoss:0.0055 | top1:46.66666793823242
13/20 | Loss:0.7778 | MainLoss:0.6948 | Adaptive:0.6990 | SubLoss:0.6923 | | SPLoss:0.0055 | top1:46.92307662963867
14/20 | Loss:0.7769 | MainLoss:0.6944 | Adaptive:0.7143 | SubLoss:0.6934 | | SPLoss:0.0054 | top1:46.42856979370117
15/20 | Loss:0.7759 | MainLoss:0.6896 | Adaptive:0.6942 | SubLoss:0.6933 | | SPLoss:0.0053 | top1:47.333335876464844
16/20 | Loss:0.7747 | MainLoss:0.6873 | Adaptive:0.7143 | SubLoss:0.6838 | | SPLoss:0.0052 | top1:48.125
17/20 | Loss:0.7757 | MainLoss:0.7189 | Adaptive:0.6942 | SubLoss:0.7008 | | SPLoss:0.0052 | top1:47.64706039428711
18/20 | Loss:0.7777 | MainLoss:0.7410 | Adaptive:0.7144 | SubLoss:0.6763 | | SPLoss:0.0050 | to

4/20 | Loss:0.7723 | MainLoss:0.7202 | Adaptive:0.6906 | SubLoss:0.6906 | | SPLoss:0.0031 | top1:40.0
5/20 | Loss:0.7706 | MainLoss:0.6930 | Adaptive:0.7061 | SubLoss:0.6931 | | SPLoss:0.0029 | top1:44.0
6/20 | Loss:0.7707 | MainLoss:0.7005 | Adaptive:0.7096 | SubLoss:0.6893 | | SPLoss:0.0028 | top1:43.333335876464844
7/20 | Loss:0.7681 | MainLoss:0.6827 | Adaptive:0.7167 | SubLoss:0.6866 | | SPLoss:0.0028 | top1:47.14285659790039
8/20 | Loss:0.7645 | MainLoss:0.6682 | Adaptive:0.7049 | SubLoss:0.6889 | | SPLoss:0.0028 | top1:50.0
9/20 | Loss:0.7711 | MainLoss:0.7524 | Adaptive:0.7072 | SubLoss:0.6848 | | SPLoss:0.0029 | top1:47.77777862548828
10/20 | Loss:0.7687 | MainLoss:0.6748 | Adaptive:0.7002 | SubLoss:0.6966 | | SPLoss:0.0029 | top1:49.0
11/20 | Loss:0.7696 | MainLoss:0.7043 | Adaptive:0.6978 | SubLoss:0.7007 | | SPLoss:0.0030 | top1:49.09090805053711
12/20 | Loss:0.7698 | MainLoss:0.7007 | Adaptive:0.7073 | SubLoss:0.6855 | | SPLoss:0.0029 | top1:49.16666793823242
13/20 | Loss:

18/20 | Loss:0.8240 | MainLoss:0.6904 | Adaptive:0.7049 | SubLoss:0.6891 | | SPLoss:0.0701 | top1:50.0
19/20 | Loss:0.8221 | MainLoss:0.7014 | Adaptive:0.7132 | SubLoss:0.7095 | | SPLoss:0.0843 | top1:50.000003814697266
20/20 | Loss:0.8206 | MainLoss:0.7030 | Adaptive:0.7072 | SubLoss:0.7240 | | SPLoss:0.0911 | top1:50.0

Epoch: [38 | 4000] LR: 0.159983
1/20 | Loss:0.7232 | MainLoss:0.6322 | Adaptive:0.7167 | SubLoss:0.7554 | | SPLoss:0.0912 | top1:70.0
2/20 | Loss:0.7680 | MainLoss:0.7222 | Adaptive:0.7037 | SubLoss:0.7432 | | SPLoss:0.0927 | top1:60.0
3/20 | Loss:0.8017 | MainLoss:0.7767 | Adaptive:0.7108 | SubLoss:0.7671 | | SPLoss:0.0866 | top1:53.333335876464844
4/20 | Loss:0.8069 | MainLoss:0.7365 | Adaptive:0.7108 | SubLoss:0.7398 | | SPLoss:0.0703 | top1:50.0
5/20 | Loss:0.7956 | MainLoss:0.6726 | Adaptive:0.6978 | SubLoss:0.6965 | | SPLoss:0.0521 | top1:54.0
6/20 | Loss:0.7921 | MainLoss:0.6978 | Adaptive:0.7167 | SubLoss:0.6990 | | SPLoss:0.0407 | top1:53.333335876464844
7/20

12/20 | Loss:0.7825 | MainLoss:0.6961 | Adaptive:0.7060 | SubLoss:0.7022 | | SPLoss:0.0665 | top1:45.000003814697266
13/20 | Loss:0.7807 | MainLoss:0.6811 | Adaptive:0.7060 | SubLoss:0.7030 | | SPLoss:0.0555 | top1:46.153846740722656
14/20 | Loss:0.7789 | MainLoss:0.6796 | Adaptive:0.6919 | SubLoss:0.6918 | | SPLoss:0.0458 | top1:47.14285659790039
15/20 | Loss:0.7802 | MainLoss:0.7212 | Adaptive:0.7096 | SubLoss:0.7156 | | SPLoss:0.0376 | top1:46.66666793823242
16/20 | Loss:0.7810 | MainLoss:0.7166 | Adaptive:0.7002 | SubLoss:0.7017 | | SPLoss:0.0306 | top1:46.25
17/20 | Loss:0.7796 | MainLoss:0.6830 | Adaptive:0.6930 | SubLoss:0.6925 | | SPLoss:0.0249 | top1:47.05882263183594
18/20 | Loss:0.7790 | MainLoss:0.6960 | Adaptive:0.6989 | SubLoss:0.6940 | | SPLoss:0.0205 | top1:47.222225189208984
19/20 | Loss:0.7782 | MainLoss:0.6906 | Adaptive:0.6931 | SubLoss:0.6927 | | SPLoss:0.0173 | top1:47.894737243652344
20/20 | Loss:0.7778 | MainLoss:0.6974 | Adaptive:0.7073 | SubLoss:0.6950 | | SPL

6/20 | Loss:0.8211 | MainLoss:0.7035 | Adaptive:0.7084 | SubLoss:0.7214 | | SPLoss:0.3403 | top1:48.333335876464844
7/20 | Loss:0.8204 | MainLoss:0.7147 | Adaptive:0.6966 | SubLoss:0.6963 | | SPLoss:0.3039 | top1:47.14285659790039
8/20 | Loss:0.8164 | MainLoss:0.6913 | Adaptive:0.7096 | SubLoss:0.6951 | | SPLoss:0.2645 | top1:48.75
9/20 | Loss:0.8123 | MainLoss:0.6868 | Adaptive:0.6966 | SubLoss:0.6939 | | SPLoss:0.2241 | top1:50.0
10/20 | Loss:0.8098 | MainLoss:0.6977 | Adaptive:0.7001 | SubLoss:0.6936 | | SPLoss:0.1846 | top1:50.0
11/20 | Loss:0.8106 | MainLoss:0.7318 | Adaptive:0.7060 | SubLoss:0.6872 | | SPLoss:0.1477 | top1:49.09090805053711
12/20 | Loss:0.8088 | MainLoss:0.7052 | Adaptive:0.7002 | SubLoss:0.6980 | | SPLoss:0.1145 | top1:49.16666793823242
13/20 | Loss:0.8086 | MainLoss:0.7249 | Adaptive:0.6942 | SubLoss:0.7046 | | SPLoss:0.0858 | top1:48.46154022216797
14/20 | Loss:0.8060 | MainLoss:0.6939 | Adaptive:0.7025 | SubLoss:0.6896 | | SPLoss:0.0625 | top1:48.571430206298

20/20 | Loss:0.7721 | MainLoss:0.6657 | Adaptive:0.7120 | SubLoss:0.6819 | | SPLoss:0.0043 | top1:53.0

Epoch: [49 | 4000] LR: 0.159966
1/20 | Loss:0.7904 | MainLoss:0.7197 | Adaptive:0.7108 | SubLoss:0.6796 | | SPLoss:0.0040 | top1:40.0
2/20 | Loss:0.7696 | MainLoss:0.6784 | Adaptive:0.7120 | SubLoss:0.6785 | | SPLoss:0.0039 | top1:50.0
3/20 | Loss:0.7700 | MainLoss:0.6997 | Adaptive:0.7084 | SubLoss:0.6833 | | SPLoss:0.0038 | top1:50.000003814697266
4/20 | Loss:0.7762 | MainLoss:0.7217 | Adaptive:0.6930 | SubLoss:0.7049 | | SPLoss:0.0038 | top1:47.5
5/20 | Loss:0.7748 | MainLoss:0.6980 | Adaptive:0.7061 | SubLoss:0.6872 | | SPLoss:0.0040 | top1:48.0
6/20 | Loss:0.7782 | MainLoss:0.7246 | Adaptive:0.7013 | SubLoss:0.6909 | | SPLoss:0.0041 | top1:41.66666793823242
7/20 | Loss:0.7767 | MainLoss:0.6956 | Adaptive:0.6978 | SubLoss:0.6971 | | SPLoss:0.0045 | top1:42.85714340209961
8/20 | Loss:0.7807 | MainLoss:0.7328 | Adaptive:0.7013 | SubLoss:0.7145 | | SPLoss:0.0050 | top1:42.5
9/20 | L

12/20 | Loss:0.8141 | MainLoss:0.7899 | Adaptive:0.7061 | SubLoss:0.6890 | | SPLoss:0.0102 | top1:45.833335876464844
13/20 | Loss:0.8091 | MainLoss:0.6751 | Adaptive:0.7049 | SubLoss:0.6905 | | SPLoss:0.0164 | top1:46.92307662963867
14/20 | Loss:0.8087 | MainLoss:0.7294 | Adaptive:0.7084 | SubLoss:0.6820 | | SPLoss:0.0246 | top1:46.42856979370117
15/20 | Loss:0.8065 | MainLoss:0.7028 | Adaptive:0.7120 | SubLoss:0.6845 | | SPLoss:0.0334 | top1:46.0
16/20 | Loss:0.8050 | MainLoss:0.7070 | Adaptive:0.7144 | SubLoss:0.7003 | | SPLoss:0.0423 | top1:45.0
17/20 | Loss:0.8030 | MainLoss:0.6940 | Adaptive:0.7037 | SubLoss:0.6996 | | SPLoss:0.0498 | top1:45.29411697387695
18/20 | Loss:0.8014 | MainLoss:0.6967 | Adaptive:0.6978 | SubLoss:0.6974 | | SPLoss:0.0557 | top1:45.55555725097656
19/20 | Loss:0.7982 | MainLoss:0.6629 | Adaptive:0.7097 | SubLoss:0.7100 | | SPLoss:0.0595 | top1:46.842105865478516
20/20 | Loss:0.7997 | MainLoss:0.7478 | Adaptive:0.7036 | SubLoss:0.7091 | | SPLoss:0.0615 | top

6/20 | Loss:0.8027 | MainLoss:0.6610 | Adaptive:0.7120 | SubLoss:0.6757 | | SPLoss:0.0793 | top1:56.66666793823242
7/20 | Loss:0.7958 | MainLoss:0.6612 | Adaptive:0.6978 | SubLoss:0.7109 | | SPLoss:0.0780 | top1:57.14285659790039
8/20 | Loss:0.7916 | MainLoss:0.6720 | Adaptive:0.7120 | SubLoss:0.6796 | | SPLoss:0.0793 | top1:57.5
9/20 | Loss:0.8092 | MainLoss:0.8598 | Adaptive:0.7120 | SubLoss:0.6825 | | SPLoss:0.0817 | top1:54.4444465637207
10/20 | Loss:0.7941 | MainLoss:0.5677 | Adaptive:0.7025 | SubLoss:0.7093 | | SPLoss:0.0833 | top1:57.0
11/20 | Loss:0.8008 | MainLoss:0.7805 | Adaptive:0.7084 | SubLoss:0.6892 | | SPLoss:0.0858 | top1:55.45454406738281
12/20 | Loss:0.7974 | MainLoss:0.6735 | Adaptive:0.7049 | SubLoss:0.6942 | | SPLoss:0.0876 | top1:55.833335876464844
13/20 | Loss:0.7991 | MainLoss:0.7368 | Adaptive:0.7060 | SubLoss:0.6874 | | SPLoss:0.0887 | top1:54.61538314819336
14/20 | Loss:0.7976 | MainLoss:0.6965 | Adaptive:0.6954 | SubLoss:0.6970 | | SPLoss:0.0888 | top1:54.2

20/20 | Loss:0.7685 | MainLoss:0.6913 | Adaptive:0.7109 | SubLoss:0.6885 | | SPLoss:0.0066 | top1:50.0

Epoch: [60 | 4000] LR: 0.159943
1/20 | Loss:0.7576 | MainLoss:0.6868 | Adaptive:0.7132 | SubLoss:0.6867 | | SPLoss:0.0057 | top1:50.0
2/20 | Loss:0.7687 | MainLoss:0.7082 | Adaptive:0.6990 | SubLoss:0.6944 | | SPLoss:0.0049 | top1:40.0
3/20 | Loss:0.7650 | MainLoss:0.6861 | Adaptive:0.7072 | SubLoss:0.6965 | | SPLoss:0.0044 | top1:50.000003814697266
4/20 | Loss:0.7745 | MainLoss:0.7307 | Adaptive:0.7013 | SubLoss:0.6981 | | SPLoss:0.0040 | top1:45.0
5/20 | Loss:0.7764 | MainLoss:0.7109 | Adaptive:0.7085 | SubLoss:0.7074 | | SPLoss:0.0036 | top1:44.0
6/20 | Loss:0.7767 | MainLoss:0.7075 | Adaptive:0.6835 | SubLoss:0.6874 | | SPLoss:0.0031 | top1:43.333335876464844
7/20 | Loss:0.7749 | MainLoss:0.6935 | Adaptive:0.6977 | SubLoss:0.6930 | | SPLoss:0.0027 | top1:45.71428680419922
8/20 | Loss:0.7749 | MainLoss:0.7036 | Adaptive:0.7120 | SubLoss:0.6976 | | SPLoss:0.0025 | top1:43.75
9/20 |

14/20 | Loss:0.8506 | MainLoss:0.7406 | Adaptive:0.7014 | SubLoss:0.7215 | | SPLoss:0.2247 | top1:53.57143020629883
15/20 | Loss:0.8446 | MainLoss:0.6296 | Adaptive:0.7073 | SubLoss:0.7808 | | SPLoss:0.2656 | top1:54.66666793823242
16/20 | Loss:0.8542 | MainLoss:0.8666 | Adaptive:0.7002 | SubLoss:0.7811 | | SPLoss:0.3057 | top1:53.75
17/20 | Loss:0.8625 | MainLoss:0.8745 | Adaptive:0.7013 | SubLoss:0.7614 | | SPLoss:0.2831 | top1:52.352943420410156
18/20 | Loss:0.8590 | MainLoss:0.6971 | Adaptive:0.7061 | SubLoss:0.6999 | | SPLoss:0.2192 | top1:52.222225189208984
19/20 | Loss:0.8590 | MainLoss:0.7649 | Adaptive:0.7085 | SubLoss:0.6849 | | SPLoss:0.1688 | top1:51.052635192871094
20/20 | Loss:0.8631 | MainLoss:0.8530 | Adaptive:0.7202 | SubLoss:0.6540 | | SPLoss:0.1357 | top1:49.5

Epoch: [64 | 4000] LR: 0.159933
1/20 | Loss:0.7610 | MainLoss:0.6720 | Adaptive:0.6989 | SubLoss:0.7021 | | SPLoss:0.1117 | top1:60.0
2/20 | Loss:0.7833 | MainLoss:0.7205 | Adaptive:0.7132 | SubLoss:0.6776 | |

8/20 | Loss:0.7747 | MainLoss:0.7020 | Adaptive:0.7061 | SubLoss:0.7097 | | SPLoss:0.0505 | top1:55.0
9/20 | Loss:0.7776 | MainLoss:0.7179 | Adaptive:0.7025 | SubLoss:0.6995 | | SPLoss:0.0515 | top1:53.333335876464844
10/20 | Loss:0.7765 | MainLoss:0.6848 | Adaptive:0.7084 | SubLoss:0.6957 | | SPLoss:0.0544 | top1:53.0
11/20 | Loss:0.7761 | MainLoss:0.6911 | Adaptive:0.7084 | SubLoss:0.6907 | | SPLoss:0.0551 | top1:52.727272033691406
12/20 | Loss:0.7759 | MainLoss:0.6924 | Adaptive:0.7108 | SubLoss:0.6857 | | SPLoss:0.0580 | top1:52.500003814697266
13/20 | Loss:0.7746 | MainLoss:0.6789 | Adaptive:0.7120 | SubLoss:0.6841 | | SPLoss:0.0580 | top1:53.846153259277344
14/20 | Loss:0.7704 | MainLoss:0.6363 | Adaptive:0.7084 | SubLoss:0.6805 | | SPLoss:0.0609 | top1:55.71428680419922
15/20 | Loss:0.7735 | MainLoss:0.7349 | Adaptive:0.7144 | SubLoss:0.6746 | | SPLoss:0.0710 | top1:54.66666793823242
16/20 | Loss:0.7729 | MainLoss:0.6763 | Adaptive:0.6943 | SubLoss:0.7296 | | SPLoss:0.0756 | top

2/20 | Loss:0.7654 | MainLoss:0.6725 | Adaptive:0.7096 | SubLoss:0.7541 | | SPLoss:0.0273 | top1:55.0
3/20 | Loss:0.8295 | MainLoss:0.8648 | Adaptive:0.7084 | SubLoss:0.7848 | | SPLoss:0.0418 | top1:46.66666793823242
4/20 | Loss:0.8144 | MainLoss:0.6772 | Adaptive:0.7013 | SubLoss:0.7597 | | SPLoss:0.0556 | top1:50.0
5/20 | Loss:0.8394 | MainLoss:0.8485 | Adaptive:0.6989 | SubLoss:0.7459 | | SPLoss:0.0699 | top1:46.0
6/20 | Loss:0.8418 | MainLoss:0.7664 | Adaptive:0.7001 | SubLoss:0.7240 | | SPLoss:0.0838 | top1:45.000003814697266
7/20 | Loss:0.8278 | MainLoss:0.6606 | Adaptive:0.7048 | SubLoss:0.7092 | | SPLoss:0.0985 | top1:48.57143020629883
8/20 | Loss:0.8209 | MainLoss:0.6899 | Adaptive:0.7072 | SubLoss:0.6950 | | SPLoss:0.1123 | top1:50.0
9/20 | Loss:0.8165 | MainLoss:0.6988 | Adaptive:0.7084 | SubLoss:0.6895 | | SPLoss:0.1238 | top1:48.88888931274414
10/20 | Loss:0.8095 | MainLoss:0.6616 | Adaptive:0.7013 | SubLoss:0.6920 | | SPLoss:0.1318 | top1:52.0
11/20 | Loss:0.8075 | MainLo

16/20 | Loss:0.8120 | MainLoss:0.7995 | Adaptive:0.7073 | SubLoss:0.6987 | | SPLoss:0.0133 | top1:46.25
17/20 | Loss:0.8129 | MainLoss:0.7445 | Adaptive:0.7060 | SubLoss:0.7049 | | SPLoss:0.0117 | top1:46.47058868408203
18/20 | Loss:0.8221 | MainLoss:0.8998 | Adaptive:0.7073 | SubLoss:0.6950 | | SPLoss:0.0125 | top1:45.0
19/20 | Loss:0.8202 | MainLoss:0.7061 | Adaptive:0.6930 | SubLoss:0.7157 | | SPLoss:0.0253 | top1:45.263160705566406
20/20 | Loss:0.8189 | MainLoss:0.7187 | Adaptive:0.7096 | SubLoss:0.6861 | | SPLoss:0.0548 | top1:44.5

Epoch: [75 | 4000] LR: 0.159902
1/20 | Loss:0.8343 | MainLoss:0.7513 | Adaptive:0.7085 | SubLoss:0.7097 | | SPLoss:0.0952 | top1:20.0
2/20 | Loss:0.7508 | MainLoss:0.5773 | Adaptive:0.7096 | SubLoss:0.7277 | | SPLoss:0.1363 | top1:55.0
3/20 | Loss:0.7772 | MainLoss:0.7277 | Adaptive:0.7108 | SubLoss:0.7687 | | SPLoss:0.1786 | top1:53.333335876464844
4/20 | Loss:0.7966 | MainLoss:0.7473 | Adaptive:0.6977 | SubLoss:0.7503 | | SPLoss:0.2147 | top1:52.5
5/

10/20 | Loss:0.7744 | MainLoss:0.6750 | Adaptive:0.7120 | SubLoss:0.7728 | | SPLoss:0.0259 | top1:53.0
11/20 | Loss:0.7831 | MainLoss:0.7829 | Adaptive:0.7096 | SubLoss:0.7733 | | SPLoss:0.0213 | top1:51.818180084228516
12/20 | Loss:0.7811 | MainLoss:0.6747 | Adaptive:0.7060 | SubLoss:0.7504 | | SPLoss:0.0175 | top1:52.500003814697266
13/20 | Loss:0.7788 | MainLoss:0.6728 | Adaptive:0.6978 | SubLoss:0.7163 | | SPLoss:0.0148 | top1:53.07692337036133
14/20 | Loss:0.7767 | MainLoss:0.6728 | Adaptive:0.7037 | SubLoss:0.7193 | | SPLoss:0.0133 | top1:53.57143020629883
15/20 | Loss:0.7769 | MainLoss:0.7042 | Adaptive:0.6990 | SubLoss:0.7048 | | SPLoss:0.0120 | top1:53.333335876464844
16/20 | Loss:0.7776 | MainLoss:0.7154 | Adaptive:0.7061 | SubLoss:0.7051 | | SPLoss:0.0117 | top1:52.5
17/20 | Loss:0.7770 | MainLoss:0.6935 | Adaptive:0.7072 | SubLoss:0.6948 | | SPLoss:0.0140 | top1:51.764705657958984
18/20 | Loss:0.7758 | MainLoss:0.6841 | Adaptive:0.7049 | SubLoss:0.6887 | | SPLoss:0.0180 | t

4/20 | Loss:0.7612 | MainLoss:0.6609 | Adaptive:0.6918 | SubLoss:0.6932 | | SPLoss:0.0223 | top1:55.0
5/20 | Loss:0.7771 | MainLoss:0.7629 | Adaptive:0.7001 | SubLoss:0.7123 | | SPLoss:0.0259 | top1:50.0
6/20 | Loss:0.7828 | MainLoss:0.7324 | Adaptive:0.7060 | SubLoss:0.7215 | | SPLoss:0.0275 | top1:48.333335876464844
7/20 | Loss:0.7759 | MainLoss:0.6559 | Adaptive:0.7203 | SubLoss:0.7319 | | SPLoss:0.0277 | top1:51.42857360839844
8/20 | Loss:0.7812 | MainLoss:0.7416 | Adaptive:0.7108 | SubLoss:0.7140 | | SPLoss:0.0278 | top1:48.75
9/20 | Loss:0.7794 | MainLoss:0.6908 | Adaptive:0.6895 | SubLoss:0.6912 | | SPLoss:0.0270 | top1:50.0
10/20 | Loss:0.7785 | MainLoss:0.6965 | Adaptive:0.6966 | SubLoss:0.6931 | | SPLoss:0.0257 | top1:47.0
11/20 | Loss:0.7774 | MainLoss:0.6935 | Adaptive:0.7061 | SubLoss:0.6890 | | SPLoss:0.0240 | top1:47.27272415161133
12/20 | Loss:0.7792 | MainLoss:0.7254 | Adaptive:0.7049 | SubLoss:0.6883 | | SPLoss:0.0219 | top1:45.833335876464844
13/20 | Loss:0.7781 | Ma

18/20 | Loss:0.8478 | MainLoss:0.8107 | Adaptive:0.7037 | SubLoss:0.7223 | | SPLoss:0.9069 | top1:51.11111068725586
19/20 | Loss:0.8478 | MainLoss:0.6806 | Adaptive:0.7014 | SubLoss:0.6999 | | SPLoss:0.9503 | top1:51.578948974609375
20/20 | Loss:0.8485 | MainLoss:0.6941 | Adaptive:0.7025 | SubLoss:0.6934 | | SPLoss:0.9597 | top1:52.0

Epoch: [86 | 4000] LR: 0.159865
1/20 | Loss:0.8802 | MainLoss:0.7156 | Adaptive:0.7097 | SubLoss:0.6833 | | SPLoss:0.9392 | top1:40.0
2/20 | Loss:0.8578 | MainLoss:0.6731 | Adaptive:0.6990 | SubLoss:0.7005 | | SPLoss:0.8908 | top1:50.0
3/20 | Loss:0.8733 | MainLoss:0.7504 | Adaptive:0.7143 | SubLoss:0.6704 | | SPLoss:0.8213 | top1:46.66666793823242
4/20 | Loss:0.8803 | MainLoss:0.7572 | Adaptive:0.7179 | SubLoss:0.6606 | | SPLoss:0.7351 | top1:45.0
5/20 | Loss:0.8729 | MainLoss:0.7056 | Adaptive:0.7002 | SubLoss:0.6988 | | SPLoss:0.6388 | top1:46.0
6/20 | Loss:0.8686 | MainLoss:0.7213 | Adaptive:0.7002 | SubLoss:0.6940 | | SPLoss:0.5390 | top1:45.00000381

12/20 | Loss:0.8301 | MainLoss:0.7853 | Adaptive:0.7001 | SubLoss:0.7426 | | SPLoss:0.2023 | top1:53.333335876464844
13/20 | Loss:0.8300 | MainLoss:0.7231 | Adaptive:0.7084 | SubLoss:0.7506 | | SPLoss:0.2224 | top1:53.07692337036133
14/20 | Loss:0.8323 | MainLoss:0.7598 | Adaptive:0.7061 | SubLoss:0.7214 | | SPLoss:0.2395 | top1:51.42857360839844
15/20 | Loss:0.8299 | MainLoss:0.6984 | Adaptive:0.7014 | SubLoss:0.6949 | | SPLoss:0.2528 | top1:50.0
16/20 | Loss:0.8294 | MainLoss:0.7221 | Adaptive:0.7060 | SubLoss:0.6880 | | SPLoss:0.2666 | top1:49.375
17/20 | Loss:0.8263 | MainLoss:0.6735 | Adaptive:0.7037 | SubLoss:0.6918 | | SPLoss:0.2794 | top1:50.0
18/20 | Loss:0.8331 | MainLoss:0.8441 | Adaptive:0.7097 | SubLoss:0.6819 | | SPLoss:0.2890 | top1:48.333335876464844
19/20 | Loss:0.8379 | MainLoss:0.8156 | Adaptive:0.6989 | SubLoss:0.7027 | | SPLoss:0.2941 | top1:46.842105865478516
20/20 | Loss:0.8360 | MainLoss:0.6943 | Adaptive:0.7073 | SubLoss:0.6908 | | SPLoss:0.2992 | top1:46.5

Ep

6/20 | Loss:0.7832 | MainLoss:0.6160 | Adaptive:0.6930 | SubLoss:0.7102 | | SPLoss:0.0466 | top1:48.333335876464844
7/20 | Loss:0.7944 | MainLoss:0.7799 | Adaptive:0.6918 | SubLoss:0.7192 | | SPLoss:0.0474 | top1:45.71428680419922
8/20 | Loss:0.7928 | MainLoss:0.7031 | Adaptive:0.7061 | SubLoss:0.6872 | | SPLoss:0.0488 | top1:46.25
9/20 | Loss:0.7929 | MainLoss:0.7170 | Adaptive:0.7143 | SubLoss:0.6777 | | SPLoss:0.0505 | top1:45.55555725097656
10/20 | Loss:0.7901 | MainLoss:0.6872 | Adaptive:0.7002 | SubLoss:0.6931 | | SPLoss:0.0527 | top1:47.0
11/20 | Loss:0.7875 | MainLoss:0.6841 | Adaptive:0.6942 | SubLoss:0.6928 | | SPLoss:0.0536 | top1:49.09090805053711
12/20 | Loss:0.7869 | MainLoss:0.6991 | Adaptive:0.7096 | SubLoss:0.7164 | | SPLoss:0.0558 | top1:49.16666793823242
13/20 | Loss:0.7849 | MainLoss:0.6730 | Adaptive:0.7262 | SubLoss:0.7663 | | SPLoss:0.0562 | top1:50.0
14/20 | Loss:0.7891 | MainLoss:0.7579 | Adaptive:0.7084 | SubLoss:0.7463 | | SPLoss:0.0557 | top1:49.285713195800

20/20 | Loss:0.8074 | MainLoss:0.6752 | Adaptive:0.6918 | SubLoss:0.6922 | | SPLoss:0.2793 | top1:50.5

Epoch: [97 | 4000] LR: 0.159822
1/20 | Loss:0.7755 | MainLoss:0.6784 | Adaptive:0.6966 | SubLoss:0.7030 | | SPLoss:0.2329 | top1:60.0
2/20 | Loss:0.7557 | MainLoss:0.6404 | Adaptive:0.7060 | SubLoss:0.7231 | | SPLoss:0.1887 | top1:65.0
3/20 | Loss:0.7988 | MainLoss:0.7930 | Adaptive:0.6978 | SubLoss:0.7156 | | SPLoss:0.1482 | top1:53.333335876464844
4/20 | Loss:0.7890 | MainLoss:0.6680 | Adaptive:0.7191 | SubLoss:0.7574 | | SPLoss:0.1118 | top1:55.0
5/20 | Loss:0.7828 | MainLoss:0.6721 | Adaptive:0.7108 | SubLoss:0.7325 | | SPLoss:0.0809 | top1:56.0
6/20 | Loss:0.7831 | MainLoss:0.7032 | Adaptive:0.7084 | SubLoss:0.7206 | | SPLoss:0.0562 | top1:55.000003814697266
7/20 | Loss:0.7837 | MainLoss:0.7115 | Adaptive:0.6918 | SubLoss:0.6919 | | SPLoss:0.0373 | top1:52.85714340209961
8/20 | Loss:0.7801 | MainLoss:0.6807 | Adaptive:0.7025 | SubLoss:0.6959 | | SPLoss:0.0249 | top1:52.5
9/20 | 

14/20 | Loss:0.7859 | MainLoss:0.6772 | Adaptive:0.7025 | SubLoss:0.7045 | | SPLoss:0.0105 | top1:50.71428680419922
15/20 | Loss:0.7842 | MainLoss:0.6871 | Adaptive:0.6989 | SubLoss:0.6954 | | SPLoss:0.0112 | top1:51.333335876464844
16/20 | Loss:0.7824 | MainLoss:0.6826 | Adaptive:0.7084 | SubLoss:0.6939 | | SPLoss:0.0118 | top1:53.125
17/20 | Loss:0.7826 | MainLoss:0.7118 | Adaptive:0.7049 | SubLoss:0.6910 | | SPLoss:0.0125 | top1:51.764705657958984
18/20 | Loss:0.7814 | MainLoss:0.6874 | Adaptive:0.7049 | SubLoss:0.6898 | | SPLoss:0.0126 | top1:51.66666793823242
19/20 | Loss:0.7803 | MainLoss:0.6864 | Adaptive:0.6966 | SubLoss:0.6952 | | SPLoss:0.0125 | top1:52.10526657104492
20/20 | Loss:0.7778 | MainLoss:0.6559 | Adaptive:0.6930 | SubLoss:0.6976 | | SPLoss:0.0123 | top1:53.0

Epoch: [101 | 4000] LR: 0.159805
1/20 | Loss:0.8568 | MainLoss:0.7827 | Adaptive:0.7096 | SubLoss:0.6832 | | SPLoss:0.0119 | top1:20.0
2/20 | Loss:0.8316 | MainLoss:0.7313 | Adaptive:0.6942 | SubLoss:0.6987 | 