In [1]:
import json
import os
import time
import random
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import shutil

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torchsummary import summary
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score

from model_pytorch import EfficientNet
from utils import Bar,Logger, AverageMeter, accuracy, mkdir_p, savefig
from warmup_scheduler import GradualWarmupScheduler
import cv2
from scipy.ndimage.filters import gaussian_filter

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import collections

In [2]:
# GPU Device
gpu_id = 0
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
use_cuda = torch.cuda.is_available()
print("GPU device %d:" %(gpu_id), use_cuda)

GPU device 0: True


# Arguments

In [3]:
source_dir = '/media/data2/dataset/GAN_ImageData/PGGAN_128/'
target_dir = '/media/data2/dataset/GAN_ImageData/StyleGAN2_256/'

In [4]:
pretrained = './log/pggan/128/b0/siamese_aug/checkpoint.pth.tar'
resume = ''

In [5]:
# Model
model_name = 'efficientnet-b0' # b0-b7 scale

# Optimization
num_classes = 128
epochs = 1000
start_epoch = 0
train_batch = 125
test_batch = 200
lr = 0.04
schedule = [50, 250, 500, 750]
momentum = 0.1
gamma = 0.1 # LR is multiplied by gamma on schedule

# CheckPoint
checkpoint = './log/pggan/128/b0/to_style2/1000shot/siamese/self' # dir
if not os.path.isdir(checkpoint):
    os.makedirs(checkpoint)
num_workers = 8

# Seed
manual_seed = 7
random.seed(manual_seed)
torch.cuda.manual_seed_all(manual_seed)

# Image
size = (128, 128)

# sp
sp_alpha = 0.1
sp_beta = 0.1
fc_name = '_fc.'

# iterative training
feedback = 0
iter_time = []

# augmentation
blur_prob = 0.5
blog_sig = 0.5
jpg_prob = 0.5

# cutmix
cm_prob = 0.5
cm_beta = 1.0

best_acc = 0

In [6]:
state = {}
state['num_classes'] = num_classes
state['epochs'] = epochs
state['start_epoch'] = start_epoch
state['train_batch'] = train_batch
state['test_batch'] = test_batch
state['lr'] = lr
state['schedule'] = schedule
state['momentum'] = momentum
state['gamma'] = gamma

In [7]:
class SiameseNetworkDataset(Dataset):
    
    def __init__(self,imageFolderDataset,transform=None,should_invert=True):
        self.imageFolderDataset = imageFolderDataset    
        self.transform = transform
        self.should_invert = should_invert
        
    def __getitem__(self,index):
        img0_tuple = random.choice(self.imageFolderDataset.imgs)
        #we need to make sure approx 50% of images are in the same class
        should_get_same_class = random.randint(0,1) 
        if should_get_same_class:
            while True:
                #keep looping till the same class image is found
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1]==img1_tuple[1]:
                    break
        else:
            while True:
                #keep looping till a different class image is found
                
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1] !=img1_tuple[1]:
                    break

        img0 = Image.open(img0_tuple[0])
        img1 = Image.open(img1_tuple[0])
#         img0 = img0.convert("L")
#         img1 = img1.convert("L")
        
        if self.should_invert:
            img0 = PIL.ImageOps.invert(img0)
            img1 = PIL.ImageOps.invert(img1)

        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
        
        return img0, img1 , torch.from_numpy(np.array([int(img1_tuple[1]!=img0_tuple[1])],dtype=np.float32))
    
    def __len__(self):
        return len(self.imageFolderDataset.imgs)

In [8]:
class ContrastiveLoss(torch.nn.Module):
    """
    Contrastive loss function.
    Based on: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    """

    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2, keepdim = True)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))


        return loss_contrastive

In [9]:
def data_augment(img):
    img = np.array(img)

    if random.random() < blur_prob:
        sig = np.random.uniform(0.0, 3.0)
        gaussian_blur(img, sig)

    if random.random() < jpg_prob:
        qual = np.random.uniform(30.0, 100.0)
        img = cv2_jpg(img, qual)

    return Image.fromarray(img)


def gaussian_blur(img, sigma):
    gaussian_filter(img[:,:,0], output=img[:,:,0], sigma=sigma)
    gaussian_filter(img[:,:,1], output=img[:,:,1], sigma=sigma)
    gaussian_filter(img[:,:,2], output=img[:,:,2], sigma=sigma)


def cv2_jpg(img, compress_val):
    img_cv2 = img[:,:,::-1]
    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), compress_val]
    result, encimg = cv2.imencode('.jpg', img_cv2, encode_param)
    decimg = cv2.imdecode(encimg, 1)
    return decimg[:,:,::-1]

# Dataset

In [10]:
train_dir = os.path.join(source_dir, 'style2/1000_shot_only')
# source_train_dir = os.path.join(target_dir, '100_shot_style1_only')
val_target_dir = os.path.join(target_dir, 'validation')
val_source_dir = os.path.join(source_dir, 'validation')

train_aug = transforms.Compose([
    transforms.Lambda(lambda img: data_augment(img)),
    transforms.Resize(size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
val_aug = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# pin_memory : cuda pin memeory use
train_ = SiameseNetworkDataset(datasets.ImageFolder(train_dir), transform=train_aug, should_invert=False)
train_loader = DataLoader(train_, shuffle=True, num_workers=num_workers, batch_size=train_batch, pin_memory=True)
val_target_ = SiameseNetworkDataset(datasets.ImageFolder(val_target_dir), transform=val_aug, should_invert=False)
val_target_loader = DataLoader(val_target_, shuffle=True, num_workers=num_workers, batch_size=test_batch, pin_memory=True)
val_source_ = SiameseNetworkDataset(datasets.ImageFolder(val_source_dir), transform=val_aug, should_invert=False)
val_source_loader = DataLoader(val_source_, batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)

# Model

In [11]:
teacher_model = EfficientNet.from_name(model_name, num_classes=num_classes,
                                      override_params={'dropout_rate':0.0})
student_model = EfficientNet.from_name(model_name, num_classes=num_classes,
                                      override_params={'dropout_rate':0.0, 'drop_connect_rate':0.2})

# Pre-trained
if pretrained:
    print("=> using pre-trained model '{}'".format(pretrained))
    teacher_model.load_state_dict(torch.load(pretrained)['state_dict'])
    student_model.load_state_dict(torch.load(pretrained)['state_dict'])

=> using pre-trained model './log/pggan/128/b0/siamese_aug/checkpoint.pth.tar'


In [12]:
teacher_model.to('cuda')
student_model.to('cuda')
cudnn.benchmark = True
print('    Total params: %.2fM' % (sum(p.numel() for p in student_model.parameters())/1000000.0))

    Total params: 4.17M


In [13]:
for param in teacher_model.parameters():
    param.requires_grad = False
teacher_model.eval()

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_gn0): GroupNorm(8, 32, eps=1e-05, affine=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_gn1): GroupNorm(8, 32, eps=1e-05, affine=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
        (static_padding): Identity()
      )
      (_gn2): GroupNorm(8, 16, ep

In [14]:
teacher_model_weights = {}
for name, param in teacher_model.named_parameters():
    teacher_model_weights[name] = param.detach()

In [15]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [16]:
def reg_cls(model):
    l2_cls = torch.tensor(0.).cuda()
    for name, param in model.named_parameters():
        if name.startswith(fc_name):
            l2_cls += torch.norm(param, p=1)
    return l2_cls

def reg_l2sp(model):
    sp_loss = torch.tensor(0.).cuda()
    for name, param in model.named_parameters():
        sp_loss += torch.norm(param - teacher_model_weights[name], p=1)
    return sp_loss

# Loss

In [17]:
criterion = ContrastiveLoss(margin=1.0).cuda()
optimizer = optim.SGD(student_model.parameters(), lr=lr, momentum=momentum)
# optimizer = optim.Adam(student_model.parameters())
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=4, total_epoch=50, after_scheduler=scheduler_cosine)

In [18]:
# Resume
if resume:
    print('==> Resuming from checkpoint..')
    checkpoint = os.path.dirname(resume)
#     checkpoint = torch.load(resume)
    resume = torch.load(resume)
    best_acc = resume['best_acc']
    start_epoch = resume['epoch']
    student_model.load_state_dict(resume['state_dict'])
    optimizer.load_state_dict(resume['optimizer'])
    logger = Logger(os.path.join(checkpoint, 'log.txt'), resume=True)
else:
    logger = Logger(os.path.join(checkpoint, 'log.txt'))
    logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Source Loss', 'Train Acc.', 'Valid Acc.', 'Source ACC.', 'Train AUROC', 'Valid AUROC', 'Source AUROC'])

# Train

In [19]:
def train(train_loader, source_train_loader, teacher_model, student_model, criterion, optimizer, epoch, use_cuda):
    student_model.train()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    end = time.time()
    arc = AverageMeter()
    cls_losses = AverageMeter()
    sp_losses = AverageMeter()
    main_losses = AverageMeter()
    alpha = AverageMeter()
    
    for batch_idx, (inputs0, inputs1, targets) in enumerate(train_loader):
        batch_size = inputs0.size(0)
        if batch_size < train_batch:
            continue
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs0, inputs1, targets = inputs0.cuda(), inputs1.cuda(), targets.cuda()
        
        outputs0 = student_model(inputs0)
        outputs1 = student_model(inputs1)
        
        r = np.random.rand(1)
        if cm_beta > 0 and r < cm_prob:
            
            target_index = targets[targets==0]
            target_index = target_index.long().cuda()
            lam = np.random.beta(cm_beta, cm_beta)
            bbx1, bby1, bbx2, bby2 = rand_bbox(inputs0.size(), lam)
            inputs0[target_index, :, bbx1:bbx2, bby1:bby2] = inputs1[target_index, :, bbx1:bbx2, bby1:bby2]
        
        outputs = F.pairwise_distance(outputs0, outputs1, keepdim=True)
        
        with torch.no_grad():
            teacher_outputs0 = teacher_model(inputs0)
            teacher_outputs1 = teacher_model(inputs1)
            teacher_loss = criterion(teacher_outputs0, teacher_outputs1, targets)
            sp_alpha = 0
            sigmoid = nn.Sigmoid()
            sp_alpha += sigmoid(-teacher_loss)
        
        loss_main = criterion(outputs0, outputs1, targets)
        loss_cls = 0
        loss_sp = 0
        loss_cls = reg_cls(student_model)
        loss_sp = reg_l2sp(student_model)
        loss =  loss_main + sp_alpha*loss_sp + sp_alpha*loss_cls

        # measure accuracy and record loss
        prec1 = accuracy(outputs.data, targets.data)
        auroc = roc_auc_score(targets.data.cpu().detach().numpy(), outputs.cpu().detach().numpy())
        losses.update(loss.data.tolist(), inputs0.size(0))
        main_losses.update(loss_main.tolist(), inputs0.size(0))
        arc.update(auroc, inputs0.size(0))
        cls_losses.update(loss_cls, inputs0.size(0))
        sp_losses.update(loss_sp, inputs0.size(0))
        top1.update(prec1[0], inputs0.size(0))
        alpha.update(sp_alpha, inputs0.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

#         if batch_idx % 10 == 0:
    print('Train | {batch}/{size} | Loss:{loss:.4f} | MainLoss:{main:.4f} | Alpha:{alp:.4f} | SPLoss:{sp:.4f} | CLSLoss:{cls:.4f} | top1:{tp1:.4f} | AUROC:{ac:.4f}'.format(
                     batch=batch_idx+1, size=len(train_loader), loss=losses.avg, main=main_losses.avg, alp=alpha.avg, sp=sp_losses.avg, cls=cls_losses.avg, tp1=top1.avg, ac=arc.avg))
    return (losses.avg, top1.avg, arc.avg)

In [20]:
def test(val_loader, model, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    arc = AverageMeter()
    cls_losses = AverageMeter()
    sp_losses = AverageMeter()
    main_losses = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for batch_idx, (inputs0, inputs1, targets) in enumerate(val_loader):
            # measure data loading time
            data_time.update(time.time() - end)

            if use_cuda:
                inputs0, inputs1, targets = inputs0.cuda(), inputs1.cuda(), targets.cuda()

            # compute output
            outputs0 = model(inputs0)
            outputs1 = model(inputs1)
            outputs = F.pairwise_distance(outputs0, outputs1, keepdim=True)

            loss_main = criterion(outputs0, outputs1, targets)
            loss_cls = 0
            loss_sp = 0
            loss_cls = reg_cls(model)
            loss_sp = reg_l2sp(model)
            loss = loss_main 

            # measure accuracy and record loss
            prec1 = accuracy(outputs.data, targets.data)
            auroc = roc_auc_score(targets.data.cpu().detach().numpy(), outputs.cpu().detach().numpy())

            losses.update(loss.data.tolist(), inputs0.size(0))
            top1.update(prec1[0], inputs0.size(0))
            main_losses.update(loss_main.tolist(), inputs0.size(0))
            arc.update(auroc, inputs0.size(0))
            cls_losses.update(loss_cls, inputs0.size(0))
            sp_losses.update(loss_sp, inputs0.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

    print('Test | {batch}/{size} | Loss:{loss:.4f} | MainLoss:{main:.4f} | SPLoss:{sp:.4f} | CLSLoss:{cls:.4f} | top1:{tp1:.4f} | AUROC:{ac:.4f}'.format(
                     batch=batch_idx+1, size=len(train_loader), loss=losses.avg, main=main_losses.avg, sp=sp_losses.avg, cls=cls_losses.avg, tp1=top1.avg, ac=arc.avg))
    return (losses.avg, top1.avg, arc.avg)

In [21]:
def save_checkpoint(state, is_best, checkpoint='checkpoint', filename='checkpoint.pth.tar'):
    filepath = os.path.join(checkpoint, filename)
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(checkpoint, 'model_best.pth.tar'))

def adjust_learning_rate(optimizer, epoch):
    global state
    lr_set = [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]
    lr_list = schedule.copy()
    lr_list.append(epoch)
    lr_list.sort()
    idx = lr_list.index(epoch)
    state['lr'] *= lr_set[idx]
    for param_group in optimizer.param_groups:
        param_group['lr'] = state['lr']

In [22]:
for epoch in range(start_epoch, epochs):    
    state['lr'] = optimizer.state_dict()['param_groups'][0]['lr']
    adjust_learning_rate(optimizer, epoch)
    print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, epochs, state['lr']))
    
    train_loss, train_acc, train_auroc = train(train_loader, train_loader, teacher_model, student_model, criterion, optimizer, epoch, use_cuda)
    
    test_loss, test_acc,test_auroc = test(val_target_loader, student_model, criterion, epoch, use_cuda)
    source_loss, source_acc, source_auroc = test(val_source_loader, student_model, criterion, epoch, use_cuda)


    logger.append([state['lr'], train_loss, test_loss, source_loss, train_acc, test_acc, source_acc, train_auroc, test_auroc, source_auroc])
    is_best = test_acc > best_acc
    best_acc = max(test_acc, best_acc)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict' : student_model.state_dict(),
        'acc': test_acc,
        'best_acc': best_acc,
        'optimizer': optimizer.state_dict(),
    }, is_best, checkpoint=checkpoint)
    scheduler_warmup.step()
    
    teacher_model.load_state_dict(student_model.state_dict())


Epoch: [1 | 1000] LR: 0.040000
Train | 32/32 | Loss:12733.3155 | MainLoss:0.4638 | Alpha:0.3902 | SPLoss:30367.8281 | CLSLoss:2233.7715 | top1:49.8064 | AUROC:0.5066
Test | 39/32 | Loss:0.3678 | MainLoss:0.3678 | SPLoss:7922.8198 | CLSLoss:2355.3589 | top1:50.1538 | AUROC:0.5037
Test | 161/32 | Loss:0.0504 | MainLoss:0.0504 | SPLoss:7922.8062 | CLSLoss:2355.3599 | top1:49.6947 | AUROC:0.9993

Epoch: [2 | 1000] LR: 0.042400
Train | 32/32 | Loss:15275.0341 | MainLoss:0.4385 | Alpha:0.4058 | SPLoss:36228.2031 | CLSLoss:1593.0243 | top1:48.8258 | AUROC:0.5185
Test | 39/32 | Loss:0.3503 | MainLoss:0.3503 | SPLoss:2914.8291 | CLSLoss:1647.8798 | top1:50.0385 | AUROC:0.5058
Test | 161/32 | Loss:0.0065 | MainLoss:0.0065 | SPLoss:2914.8291 | CLSLoss:1647.8799 | top1:50.0187 | AUROC:0.9995

Epoch: [3 | 1000] LR: 0.044800
Train | 32/32 | Loss:16028.6107 | MainLoss:0.4109 | Alpha:0.4124 | SPLoss:36264.5117 | CLSLoss:2527.4231 | top1:49.6516 | AUROC:0.5090
Test | 39/32 | Loss:0.3280 | MainLoss:0.3

Test | 161/32 | Loss:0.2864 | MainLoss:0.2864 | SPLoss:26970.8047 | CLSLoss:1058.3475 | top1:49.7975 | AUROC:0.6294

Epoch: [22 | 1000] LR: 0.090400
Train | 32/32 | Loss:35117.4627 | MainLoss:0.3689 | Alpha:0.4175 | SPLoss:79024.0625 | CLSLoss:5427.5371 | top1:50.8387 | AUROC:0.4911
Test | 39/32 | Loss:0.3033 | MainLoss:0.3033 | SPLoss:34069.2812 | CLSLoss:3842.8672 | top1:49.1538 | AUROC:0.5067
Test | 161/32 | Loss:0.2747 | MainLoss:0.2747 | SPLoss:34069.3594 | CLSLoss:3842.8713 | top1:49.4984 | AUROC:0.5872

Epoch: [23 | 1000] LR: 0.092800
Train | 32/32 | Loss:39362.9526 | MainLoss:0.3636 | Alpha:0.4237 | SPLoss:88047.1953 | CLSLoss:4956.7646 | top1:49.9613 | AUROC:0.5033
Test | 39/32 | Loss:0.3074 | MainLoss:0.3074 | SPLoss:19708.6074 | CLSLoss:5619.4019 | top1:50.2179 | AUROC:0.4924
Test | 161/32 | Loss:0.2995 | MainLoss:0.2995 | SPLoss:19708.6094 | CLSLoss:5619.4106 | top1:50.1807 | AUROC:0.5400

Epoch: [24 | 1000] LR: 0.095200
Train | 32/32 | Loss:36036.5742 | MainLoss:0.3706 | A

Test | 39/32 | Loss:0.2982 | MainLoss:0.2982 | SPLoss:15260.0762 | CLSLoss:6098.5815 | top1:50.1026 | AUROC:0.5067
Test | 161/32 | Loss:0.2998 | MainLoss:0.2998 | SPLoss:15260.0439 | CLSLoss:6098.5811 | top1:50.3022 | AUROC:0.5087

Epoch: [43 | 1000] LR: 0.140800
Train | 32/32 | Loss:52061.3109 | MainLoss:0.3818 | Alpha:0.4249 | SPLoss:114419.6406 | CLSLoss:7768.5537 | top1:49.3677 | AUROC:0.4869
Test | 39/32 | Loss:0.3082 | MainLoss:0.3082 | SPLoss:29035.6055 | CLSLoss:3238.8799 | top1:48.5128 | AUROC:0.4941
Test | 161/32 | Loss:0.3020 | MainLoss:0.3020 | SPLoss:29035.5918 | CLSLoss:3238.8801 | top1:50.2150 | AUROC:0.5025

Epoch: [44 | 1000] LR: 0.143200
Train | 32/32 | Loss:58925.6059 | MainLoss:0.3665 | Alpha:0.4261 | SPLoss:129904.5938 | CLSLoss:8557.0674 | top1:51.1484 | AUROC:0.5192
Test | 39/32 | Loss:0.2923 | MainLoss:0.2923 | SPLoss:29997.7773 | CLSLoss:3847.5325 | top1:50.5641 | AUROC:0.5057
Test | 161/32 | Loss:0.2970 | MainLoss:0.2970 | SPLoss:29997.7676 | CLSLoss:3847.5288

Train | 32/32 | Loss:7095.7760 | MainLoss:0.3413 | Alpha:0.4268 | SPLoss:14765.5469 | CLSLoss:1890.6428 | top1:50.9161 | AUROC:0.5069
Test | 39/32 | Loss:0.3015 | MainLoss:0.3015 | SPLoss:1207.5518 | CLSLoss:1868.1403 | top1:50.4231 | AUROC:0.4998
Test | 161/32 | Loss:0.3069 | MainLoss:0.3069 | SPLoss:1207.5519 | CLSLoss:1868.1403 | top1:49.4704 | AUROC:0.5054

Epoch: [64 | 1000] LR: 0.015994
Train | 32/32 | Loss:7164.4795 | MainLoss:0.3405 | Alpha:0.4250 | SPLoss:15155.9141 | CLSLoss:1705.2991 | top1:50.9161 | AUROC:0.5163
Test | 39/32 | Loss:0.2983 | MainLoss:0.2983 | SPLoss:5178.6719 | CLSLoss:1516.3296 | top1:49.8846 | AUROC:0.4985
Test | 161/32 | Loss:0.3018 | MainLoss:0.3018 | SPLoss:5178.6709 | CLSLoss:1516.3268 | top1:49.6760 | AUROC:0.5025

Epoch: [65 | 1000] LR: 0.015993
Train | 32/32 | Loss:6179.3547 | MainLoss:0.3629 | Alpha:0.4238 | SPLoss:13397.6982 | CLSLoss:1205.0430 | top1:48.4903 | AUROC:0.4921
Test | 39/32 | Loss:0.3066 | MainLoss:0.3066 | SPLoss:1083.3274 | CLSLoss:

Train | 32/32 | Loss:5911.6173 | MainLoss:0.3442 | Alpha:0.4252 | SPLoss:12991.1055 | CLSLoss:894.5773 | top1:51.0968 | AUROC:0.4976
Test | 39/32 | Loss:0.3117 | MainLoss:0.3117 | SPLoss:2315.1196 | CLSLoss:370.0217 | top1:50.1538 | AUROC:0.4914
Test | 161/32 | Loss:0.3133 | MainLoss:0.3133 | SPLoss:2315.1199 | CLSLoss:370.0215 | top1:49.7664 | AUROC:0.5054

Epoch: [85 | 1000] LR: 0.015957
Train | 32/32 | Loss:5959.9228 | MainLoss:0.3378 | Alpha:0.4256 | SPLoss:13002.2842 | CLSLoss:980.8745 | top1:51.2774 | AUROC:0.5054
Test | 39/32 | Loss:0.3171 | MainLoss:0.3171 | SPLoss:2388.8997 | CLSLoss:594.4349 | top1:49.6795 | AUROC:0.4988
Test | 161/32 | Loss:0.3196 | MainLoss:0.3196 | SPLoss:2388.8975 | CLSLoss:594.4344 | top1:49.8754 | AUROC:0.4998

Epoch: [86 | 1000] LR: 0.015954
Train | 32/32 | Loss:6363.3798 | MainLoss:0.3410 | Alpha:0.4224 | SPLoss:14254.3604 | CLSLoss:809.4820 | top1:50.7355 | AUROC:0.4935
Test | 39/32 | Loss:0.3087 | MainLoss:0.3087 | SPLoss:1939.4309 | CLSLoss:289.008

Train | 32/32 | Loss:6793.2607 | MainLoss:0.3298 | Alpha:0.4245 | SPLoss:15108.4785 | CLSLoss:878.7673 | top1:49.6258 | AUROC:0.4996
Test | 39/32 | Loss:0.3111 | MainLoss:0.3111 | SPLoss:5931.0391 | CLSLoss:433.8425 | top1:49.9744 | AUROC:0.4971
Test | 161/32 | Loss:0.3122 | MainLoss:0.3122 | SPLoss:5931.0396 | CLSLoss:433.8435 | top1:49.9844 | AUROC:0.5076

Epoch: [106 | 1000] LR: 0.015885
Train | 32/32 | Loss:6055.3197 | MainLoss:0.3766 | Alpha:0.4210 | SPLoss:13407.1191 | CLSLoss:961.8887 | top1:49.2645 | AUROC:0.4848
Test | 39/32 | Loss:0.3032 | MainLoss:0.3032 | SPLoss:2784.0310 | CLSLoss:674.0017 | top1:50.5769 | AUROC:0.5041
Test | 161/32 | Loss:0.3074 | MainLoss:0.3074 | SPLoss:2784.0364 | CLSLoss:674.0009 | top1:50.2461 | AUROC:0.4991

Epoch: [107 | 1000] LR: 0.015881
Train | 32/32 | Loss:6747.8408 | MainLoss:0.3424 | Alpha:0.4241 | SPLoss:15148.6465 | CLSLoss:806.3983 | top1:49.7548 | AUROC:0.5167
Test | 39/32 | Loss:0.3102 | MainLoss:0.3102 | SPLoss:2243.8286 | CLSLoss:492.8

KeyboardInterrupt: 