In [1]:
import json
import os
import time
import random
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import shutil
import collections

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torchsummary import summary

from model_pytorch import resnext50_32x4d
from utils import Bar,Logger, AverageMeter, accuracy, mkdir_p, savefig
from warmup_scheduler import GradualWarmupScheduler
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split

from scipy.ndimage.filters import gaussian_filter
import cv2

from PIL import ImageFile, ImageOps
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [2]:
# GPU Device
gpu_id = 2
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
use_cuda = torch.cuda.is_available()
print("GPU device %d:" %(gpu_id), use_cuda)

GPU device 2: True


# Arguments

In [3]:
data_dir = '/media/data2/dataset/GAN_ImageData/StyleGAN_256'

In [4]:
pretrained = ''
resume = ''

In [5]:
# Model
model_name = 'resnext32x4d' # b0-b7 scale

# Optimization
num_classes = 2
epochs = 300
start_epoch = 0
train_batch = 160
test_batch = 160
lr = 0.04
schedule = [75, 175, 250]
momentum = 0.9
gamma = 0.1 # LR is multiplied by gamma on schedule

# CheckPoint
checkpoint = './log/style1/128/32x4d/aug' # dir
if not os.path.isdir(checkpoint):
    os.makedirs(checkpoint)
num_workers = 8

# Seed
manual_seed = 7
random.seed(manual_seed)
torch.cuda.manual_seed_all(manual_seed)

# Image
size = (128, 128)

# cutmix
cm_prob = 0.5
cm_beta = 1.0

# augmentation
blur_prob = 0.2
blog_sig = 0.5
jpg_prob = 0.2

best_acc = 0

In [6]:
state = {}
state['num_classes'] = num_classes
state['epochs'] = epochs
state['start_epoch'] = start_epoch
state['train_batch'] = train_batch
state['test_batch'] = test_batch
state['lr'] = lr
state['schedule'] = schedule
state['momentum'] = momentum
state['gamma'] = gamma

In [7]:
def data_augment(img):
    img = np.array(img)

    if random.random() < blur_prob:
        sig = np.random.uniform(0.0, 3.0)
        gaussian_blur(img, sig)

    if random.random() < jpg_prob:
        qual = np.random.uniform(30.0, 100.0)
        img = cv2_jpg(img, qual)

    return Image.fromarray(img)


def gaussian_blur(img, sigma):
    gaussian_filter(img[:,:,0], output=img[:,:,0], sigma=sigma)
    gaussian_filter(img[:,:,1], output=img[:,:,1], sigma=sigma)
    gaussian_filter(img[:,:,2], output=img[:,:,2], sigma=sigma)


def cv2_jpg(img, compress_val):
    img_cv2 = img[:,:,::-1]
    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), compress_val]
    result, encimg = cv2.imencode('.jpg', img_cv2, encode_param)
    decimg = cv2.imdecode(encimg, 1)
    return decimg[:,:,::-1]

In [8]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

# Dataset

In [9]:
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'validation')    

train_aug = transforms.Compose([
    transforms.Lambda(lambda img: data_augment(img)),
    transforms.Resize(size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
#     transforms.RandomErasing(p=0.3, scale=(0.02, 0.10), ratio=(0.3, 3.3), value=0, inplace=True),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
val_aug = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# pin_memory : cuda pin memeory use
train_loader = DataLoader(datasets.ImageFolder(train_dir, transform=train_aug),
                          batch_size=train_batch, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(datasets.ImageFolder(val_dir, val_aug),
                       batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)

# Model

In [10]:
model = resnext50_32x4d(pretrained=False, num_classes=2)

# Pre-trained
if pretrained:
    print("=> using pre-trained model '{}'".format(pretrained))
    model.load_state_dict(torch.load(pretrained)['state_dict'])

In [11]:
model.to('cuda')
cudnn.benchmark = True
print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    Total params: 22.98M


# Loss

In [12]:
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=1e-4)
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=8, total_epoch=10, after_scheduler=scheduler_cosine)

In [13]:
# Resume
if resume:
    print('==> Resuming from checkpoint..')
    checkpoint = os.path.dirname(resume)
#     checkpoint = torch.load(resume)
    resume = torch.load(resume)
    best_acc = resume['best_acc']
    start_epoch = resume['epoch']
    model.load_state_dict(resume['state_dict'])
    optimizer.load_state_dict(resume['optimizer'])
    logger = Logger(os.path.join(checkpoint, 'log.txt'), resume=True)
else:
    logger = Logger(os.path.join(checkpoint, 'log.txt'))
    logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.', 'Train AUROC.', 'Valid AUROC.'])

# Train

In [14]:
def train(train_loader, model, criterion, optimizer, epoch, use_cuda):
    model.train()
    
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    arc = AverageMeter()
    end = time.time()
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        batch_size = inputs.size(0)
        if batch_size < train_batch:
            continue
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
            
        r = np.random.rand(1)
        if cm_beta > 0 and r < cm_prob:
            
            rand_index = torch.randperm(inputs.size()[0]).cuda()
            tt= targets[rand_index]
            boolean = targets==tt
            rand_index = rand_index[boolean]
            lam = np.random.beta(cm_beta, cm_beta)
            bbx1, bby1, bbx2, bby2 = rand_bbox(inputs.size(), lam)
            inputs[boolean, :, bbx1:bbx2, bby1:bby2] = inputs[rand_index, :, bbx1:bbx2, bby1:bby2]

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1 = accuracy(outputs.data, targets.data)
        auroc = roc_auc_score(targets.cpu().detach().numpy(), outputs.cpu().detach().numpy()[:,1])
        losses.update(loss.data.tolist(), inputs.size(0))
        top1.update(prec1[0], inputs.size(0))
        arc.update(auroc, inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        
        if batch_idx % 100 == 0:
            print('{batch}/{size} | Loss:{loss:.4f} | top1:{tp1:.4f} | AUROC:{ac:.4f}'.format(
                     batch=batch_idx+1, size=len(train_loader), loss=losses.avg, tp1=top1.avg, ac=arc.avg))
    print('{batch}/{size} | Loss:{loss:.4f} | top1:{tp1:.4f} | AUROC:{ac:.4f}'.format(
                     batch=batch_idx+1, size=len(train_loader), loss=losses.avg, tp1=top1.avg, ac=arc.avg))
    return (losses.avg, top1.avg, arc.avg)

In [15]:
def test(val_loader, model, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    arc = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            # measure data loading time
            data_time.update(time.time() - end)

            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            prec1 = accuracy(outputs.data, targets.data)
            auroc = roc_auc_score(targets.cpu().detach().numpy(), outputs.cpu().detach().numpy()[:,1])
            losses.update(loss.data.tolist(), inputs.size(0))
            top1.update(prec1[0], inputs.size(0))
            arc.update(auroc, inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

    print('{batch}/{size} | Loss:{loss:.4f} | top1:{tp1:.4f} | AUROC:{ac:.4f}'.format(
         batch=batch_idx+1, size=len(val_loader), loss=losses.avg, tp1=top1.avg, ac=arc.avg))
    return (losses.avg, top1.avg, arc.avg)

In [16]:
def save_checkpoint(state, is_best, checkpoint='checkpoint', filename='checkpoint.pth.tar'):
    filepath = os.path.join(checkpoint, filename)
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(checkpoint, 'model_best.pth.tar'))

def adjust_learning_rate(optimizer, epoch):
    global state
    lr_set = [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]
    lr_list = schedule.copy()
    lr_list.append(epoch)
    lr_list.sort()
    idx = lr_list.index(epoch)
    state['lr'] *= lr_set[idx]
    for param_group in optimizer.param_groups:
        param_group['lr'] = state['lr']

In [17]:
for epoch in range(start_epoch, epochs):
    state['lr'] = optimizer.state_dict()['param_groups'][0]['lr']
    adjust_learning_rate(optimizer, epoch)
    print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, epochs, state['lr']))
    
    train_loss, train_acc, train_auroc = train(train_loader, model, criterion, optimizer, epoch, use_cuda)
    test_loss, test_acc, test_auroc = test(val_loader, model, criterion, epoch, use_cuda)
    
    logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc, train_auroc, test_auroc])
    scheduler_warmup.step()

    is_best = test_acc > best_acc
    best_acc = max(test_acc, best_acc)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict' : model.state_dict(),
        'acc': test_acc,
        'best_acc': best_acc,
        'optimizer': optimizer.state_dict(),
    }, is_best, checkpoint=checkpoint)


Epoch: [1 | 300] LR: 0.040000
1/431 | Loss:0.8684 | top1:55.0000 | AUROC:0.5464
101/431 | Loss:34.3031 | top1:50.3899 | AUROC:0.5122
201/431 | Loss:17.5896 | top1:50.2705 | AUROC:0.5126
301/431 | Loss:11.9772 | top1:50.0415 | AUROC:0.5130
401/431 | Loss:9.1638 | top1:50.1449 | AUROC:0.5132
431/431 | Loss:8.5926 | top1:50.1628 | AUROC:0.5127
49/49 | Loss:0.6932 | top1:50.4744 | AUROC:0.5076

Epoch: [2 | 300] LR: 0.068000
1/431 | Loss:0.6905 | top1:56.8750 | AUROC:0.5602
101/431 | Loss:0.6966 | top1:50.1238 | AUROC:0.5097
201/431 | Loss:0.6965 | top1:50.5597 | AUROC:0.5153
301/431 | Loss:0.6969 | top1:50.2824 | AUROC:0.5142
401/431 | Loss:0.6965 | top1:50.2587 | AUROC:0.5151
431/431 | Loss:0.6965 | top1:50.2079 | AUROC:0.5144
49/49 | Loss:0.6937 | top1:50.7564 | AUROC:0.5168

Epoch: [3 | 300] LR: 0.096000
1/431 | Loss:0.6992 | top1:43.1250 | AUROC:0.5380
101/431 | Loss:0.6957 | top1:50.5198 | AUROC:0.5185
201/431 | Loss:0.6962 | top1:50.3420 | AUROC:0.5195
301/431 | Loss:0.6957 | top1:5


Epoch: [22 | 300] LR: 0.319124
1/431 | Loss:0.6912 | top1:53.1250 | AUROC:0.5308
101/431 | Loss:0.6908 | top1:52.1411 | AUROC:0.5567
201/431 | Loss:0.6927 | top1:51.9963 | AUROC:0.5591
301/431 | Loss:0.6926 | top1:51.7629 | AUROC:0.5562
401/431 | Loss:0.6920 | top1:52.1010 | AUROC:0.5536
431/431 | Loss:0.6918 | top1:52.1584 | AUROC:0.5539
49/49 | Loss:0.6861 | top1:53.6923 | AUROC:0.5686

Epoch: [23 | 300] LR: 0.318940
1/431 | Loss:0.6870 | top1:51.2500 | AUROC:0.5434
101/431 | Loss:0.6902 | top1:52.8713 | AUROC:0.5617
201/431 | Loss:0.6902 | top1:52.8731 | AUROC:0.5652
301/431 | Loss:0.6901 | top1:53.0876 | AUROC:0.5655
401/431 | Loss:0.6899 | top1:53.2466 | AUROC:0.5690
431/431 | Loss:0.6899 | top1:53.2340 | AUROC:0.5696
49/49 | Loss:0.6849 | top1:55.2821 | AUROC:0.5709

Epoch: [24 | 300] LR: 0.318738
1/431 | Loss:0.6832 | top1:56.2500 | AUROC:0.5652
101/431 | Loss:0.6916 | top1:53.2921 | AUROC:0.5713
201/431 | Loss:0.6941 | top1:52.2295 | AUROC:0.5682
301/431 | Loss:0.6919 | top1:5


Epoch: [43 | 300] LR: 0.311643
1/431 | Loss:0.0499 | top1:98.1250 | AUROC:0.9992
101/431 | Loss:0.1068 | top1:96.0272 | AUROC:0.9937
201/431 | Loss:0.1080 | top1:96.0417 | AUROC:0.9936
301/431 | Loss:0.1106 | top1:95.9198 | AUROC:0.9933
401/431 | Loss:0.1097 | top1:95.8650 | AUROC:0.9935
431/431 | Loss:0.1084 | top1:95.9259 | AUROC:0.9936
49/49 | Loss:0.0915 | top1:96.8846 | AUROC:0.9970

Epoch: [44 | 300] LR: 0.311100
1/431 | Loss:0.1850 | top1:95.6250 | AUROC:0.9847
101/431 | Loss:0.1047 | top1:95.9406 | AUROC:0.9942
201/431 | Loss:0.1024 | top1:96.0852 | AUROC:0.9943
301/431 | Loss:0.1037 | top1:96.0278 | AUROC:0.9942
401/431 | Loss:0.1031 | top1:96.0505 | AUROC:0.9943
431/431 | Loss:0.1029 | top1:96.0552 | AUROC:0.9943
49/49 | Loss:0.0540 | top1:98.1154 | AUROC:0.9981

Epoch: [45 | 300] LR: 0.310541
1/431 | Loss:0.1163 | top1:95.0000 | AUROC:0.9967
101/431 | Loss:0.0950 | top1:96.2809 | AUROC:0.9953
201/431 | Loss:0.0969 | top1:96.2282 | AUROC:0.9950
301/431 | Loss:0.0950 | top1:9


Epoch: [64 | 300] LR: 0.296858
1/431 | Loss:0.0545 | top1:96.8750 | AUROC:1.0000
101/431 | Loss:0.0686 | top1:97.4505 | AUROC:0.9975
201/431 | Loss:0.0690 | top1:97.4565 | AUROC:0.9974
301/431 | Loss:0.0685 | top1:97.4585 | AUROC:0.9974
401/431 | Loss:0.0706 | top1:97.3691 | AUROC:0.9972
431/431 | Loss:0.0706 | top1:97.3677 | AUROC:0.9972
49/49 | Loss:0.0368 | top1:98.8077 | AUROC:0.9993

Epoch: [65 | 300] LR: 0.295983
1/431 | Loss:0.0721 | top1:96.8750 | AUROC:0.9980
101/431 | Loss:0.0687 | top1:97.3577 | AUROC:0.9976
201/431 | Loss:0.0680 | top1:97.4689 | AUROC:0.9976
301/431 | Loss:0.0688 | top1:97.4543 | AUROC:0.9976
401/431 | Loss:0.0694 | top1:97.4189 | AUROC:0.9976
431/431 | Loss:0.0696 | top1:97.4172 | AUROC:0.9975
49/49 | Loss:0.0352 | top1:98.7051 | AUROC:0.9991

Epoch: [66 | 300] LR: 0.295092
1/431 | Loss:0.0409 | top1:98.7500 | AUROC:0.9998
101/431 | Loss:0.0628 | top1:97.6795 | AUROC:0.9979
201/431 | Loss:0.0660 | top1:97.5653 | AUROC:0.9976
301/431 | Loss:0.0665 | top1:9

KeyboardInterrupt: 