In [1]:
import json
import os
import time
import random
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
import shutil

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torchsummary import summary
import torch.nn.functional as F

from model_pytorch import EfficientNet
from utils import Bar,Logger, AverageMeter, accuracy, mkdir_p, savefig
from warmup_scheduler import GradualWarmupScheduler

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from sklearn.metrics import roc_auc_score, accuracy_score

In [2]:
# GPU Device
gpu_id = 3
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
use_cuda = torch.cuda.is_available()
print("GPU device %d:" %(gpu_id), use_cuda)

GPU device 3: True


# Arguments

In [3]:
pretrained = './log/style2/128/b0/siamese/checkpoint.pth.tar'
resume = ''

In [4]:
# Model
model_name = 'efficientnet-b0' # b0-b7 scale

# Optimization
num_classes = 128
epochs = 400
start_epoch = 0
train_batch = 512
test_batch = 300
lr = 0.04
schedule = [150, 225]
momentum = 0.9
gamma = 0.1 # LR is multiplied by gamma on schedule

# CheckPoint
checkpoint = './log/' # dir
if not os.path.isdir(checkpoint):
    os.mkdir(checkpoint)
num_workers = 12

# Seed
manual_seed = 7
random.seed(manual_seed)
torch.cuda.manual_seed_all(manual_seed)

#
size = (128, 128)

# constrastive
thresh = 0.4

best_acc = 0

In [5]:
state = {}
state['num_classes'] = num_classes
state['epochs'] = epochs
state['start_epoch'] = start_epoch
state['train_batch'] = train_batch
state['test_batch'] = test_batch
state['lr'] = lr
state['schedule'] = schedule
state['momentum'] = momentum
state['gamma'] = gamma

In [6]:
class SiameseNetworkDataset(Dataset):
    
    def __init__(self,imageFolderDataset,transform=None,should_invert=True):
        self.imageFolderDataset = imageFolderDataset    
        self.transform = transform
        self.should_invert = should_invert
        
    def __getitem__(self,index):
        img0_tuple = random.choice(self.imageFolderDataset.imgs)
        #we need to make sure approx 50% of images are in the same class
        should_get_same_class = random.randint(0,1) 
        if should_get_same_class:
            while True:
                #keep looping till the same class image is found
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1]==img1_tuple[1]:
                    break
        else:
            while True:
                #keep looping till a different class image is found
                
                img1_tuple = random.choice(self.imageFolderDataset.imgs) 
                if img0_tuple[1] !=img1_tuple[1]:
                    break

        img0 = Image.open(img0_tuple[0])
        img1 = Image.open(img1_tuple[0])
#         img0 = img0.convert("L")
#         img1 = img1.convert("L")
        
        if self.should_invert:
            img0 = PIL.ImageOps.invert(img0)
            img1 = PIL.ImageOps.invert(img1)

        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
        
        return img0, img1 , torch.from_numpy(np.array([int(img1_tuple[1]!=img0_tuple[1])],dtype=np.float32))
    
    def __len__(self):
        return len(self.imageFolderDataset.imgs)

In [7]:
class ContrastiveLoss(torch.nn.Module):
    """
    Contrastive loss function.
    Based on: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    """

    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2, keepdim = True)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))


        return loss_contrastive

# Model

In [8]:
model = EfficientNet.from_name(model_name, num_classes=num_classes,
                              override_params={'dropout_rate':0.0, 'drop_connect_rate':0.2})

# Pre-trained
if pretrained:
    print("=> using pre-trained model '{}'".format(pretrained))
    model.load_state_dict(torch.load(pretrained)['state_dict'])

=> using pre-trained model './log/style2/128/b0/siamese/checkpoint.pth.tar'


In [9]:
model.to('cuda')
cudnn.benchmark = True
print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    Total params: 4.17M


In [10]:
# summary(model, input_size=(3,64,64), device='cuda')

# Loss

In [11]:
criterion = ContrastiveLoss(margin=1.0).cuda()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=8, total_epoch=10, after_scheduler=scheduler_cosine)

In [12]:
# # Resume
# if resume:
#     print('==> Resuming from checkpoint..')
#     checkpoint = os.path.dirname(resume)
# #     checkpoint = torch.load(resume)
#     resume = torch.load(resume)
#     best_acc = resume['best_acc']
#     start_epoch = resume['epoch']
#     model.load_state_dict(resume['state_dict'])
#     optimizer.load_state_dict(resume['optimizer'])
#     logger = Logger(os.path.join(checkpoint, 'log.txt'), resume=True)
# else:
#     logger = Logger(os.path.join(checkpoint, 'log.txt'))
#     logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])

In [13]:
def test(val_loader, model, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    bar = Bar('Processing', max=len(val_loader))
    with torch.no_grad():
        for batch_idx, (inputs0, inputs1, targets) in enumerate(val_loader):
            data_time.update(time.time() - end)

            if use_cuda:
                inputs0, inputs1, targets = inputs0.cuda(), inputs1.cuda(), targets.cuda()

            # compute output
            outputs0 = model(inputs0)
            outputs1 = model(inputs1)
            loss = criterion(outputs0, outputs1, targets)
            outputs = F.pairwise_distance(outputs0, outputs1, keepdim=True)
            auroc = roc_auc_score(targets.data.cpu().numpy(), outputs.cpu().numpy())


            # measure accuracy and record loss
            pred = outputs.data
            pred[pred < thresh] = 0.
            pred[pred >= thresh] = 1.
            prec1 = [accuracy_score(targets.data.cpu().numpy(), pred.cpu().numpy())]
            losses.update(loss.data.tolist(), inputs0.size(0))
            top1.update(prec1[0], inputs0.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # plot progress
            bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:} | top1: {top1:}'.format(
                        batch=batch_idx + 1,
                        size=len(val_loader),
                        data=data_time.avg,
                        bt=batch_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td,
                        loss=losses.avg,
                        top1=top1.avg,)
            bar.next()
    print('{batch}/{size} Data:{data:.3f} | Batch:{bt:.3f} | Total:{total:} | AUROC:{eta:} | Loss:{loss:} | top1:{tp1:}'.format(
         batch=batch_idx+1, size=len(val_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=auroc, loss=losses.avg, tp1=top1.avg))
    bar.finish()
    return (losses.avg, top1.avg)

In [14]:
def save_checkpoint(state, is_best, checkpoint='checkpoint', filename='checkpoint.pth.tar'):
    filepath = os.path.join(checkpoint, filename)
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(checkpoint, 'model_best.pth.tar'))

def adjust_learning_rate(optimizer, epoch):
    global state
    if epoch in schedule:
        state['lr'] *= gamma
        for param_group in optimizer.param_groups:
            param_group['lr'] = state['lr']

# Dataset

In [15]:
data_dir = '/media/data2/dataset/GAN_ImageData/PGGAN_128/'

In [16]:
test_dir = os.path.join(data_dir, 'test_50')

test_aug = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
])

test_ = SiameseNetworkDataset(datasets.ImageFolder(test_dir), transform=test_aug, should_invert=False)
test_loader = DataLoader(test_, batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)

In [17]:
test_loss, test_acc = test(test_loader, model, criterion, 1, use_cuda)

126/126 Data:0.000 | Batch:0.842 | Total:0:00:59 | AUROC:0.4921658986175116 | Loss:0.4927538548999464 | top1:0.49685886173667676


In [18]:
data_dir = '/media/data2/dataset/GAN_ImageData/StyleGAN_256/'

test_dir = os.path.join(data_dir, 'test')

test_aug = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
])

test_ = SiameseNetworkDataset(datasets.ImageFolder(test_dir, test_aug), transform=test_aug, should_invert=False)
test_loader = DataLoader(test_, batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)

In [19]:
test_loss, test_acc = test(test_loader, model, criterion, 1, use_cuda)

200/200 Data:0.000 | Batch:0.571 | Total:0:01:57 | AUROC:1.0 | Loss:0.006346221163130395 | top1:0.9917166666666667


In [20]:
data_dir = '/media/data2/dataset/GAN_ImageData/StyleGAN2_256/'

test_dir = os.path.join(data_dir, 'test')

test_aug = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
])

test_ = SiameseNetworkDataset(datasets.ImageFolder(test_dir, test_aug), transform=test_aug, should_invert=False)
test_loader = DataLoader(test_, batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)

In [21]:
test_loss, test_acc = test(test_loader, model, criterion, 1, use_cuda)

200/200 Data:0.000 | Batch:0.373 | Total:0:01:47 | AUROC:0.999866375662554 | Loss:0.007220894595138816 | top1:0.9907666666666667


In [22]:
data_dir = '/media/data2/dataset/GAN_ImageData/StarGAN_128/'

test_dir = os.path.join(data_dir, 'test')

test_aug = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
])

test_ = SiameseNetworkDataset(datasets.ImageFolder(test_dir, test_aug), transform=test_aug, should_invert=False)
test_loader = DataLoader(test_, batch_size=test_batch, shuffle=True, num_workers=num_workers, pin_memory=True)

In [23]:
test_loss, test_acc = test(test_loader, model, criterion, 1, use_cuda)

334/334 Data:0.000 | Batch:1.218 | Total:0:03:04 | AUROC:0.48695303091128056 | Loss:0.4875947008132935 | top1:0.49926
