### DLP Lab6
Goal of this lab is to implement a conditional GAN to generate synthetic images according to different conditions
1. Shape: cube, sphere, cylinder
2. Color: gray, red, blue, green, brown, purple, cyan, yellow

#### Implement detail
1. Implement training, testing functions, and dataloader
2. Choose your conditional GAN architecture
3. Design your generator and discriminator
4. Choose your loss function
5. Output the results based on test.json and new_test.json (will be released before demo)

Date: 2020/05/

In [1]:
import numpy as np
import json
import time
import gc
import math
from tqdm import tqdm
from PIL import Image
import torch
import torch.nn as nn
from torch import optim
from torch.utils import data
import torchvision.transforms as transforms
from torchvision.utils import save_image, make_grid
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [3]:
def show_result(G_losses, D_losses, save = False, path = './img/training_loss.png'):      
    plt.figure(figsize=(10, 6))
    x = range(len(G_losses))
    plt.ylabel("Loss")
    plt.xlabel("Epochs")
    plt.title("Training Loss Curve", fontsize=18)
    plt.plot(x, G_losses, label='G_loss')
    plt.plot(x, D_losses, label='D_loss')
    plt.legend()
    plt.show()
    
    if save:
        plt.savefig(path) 

### Dataloader

In [4]:
def get_data(mode):
    assert mode == 'train' or mode == 'test'
    data = json.load(open('./data/'+mode+'.json', 'r'))
    if mode == 'train':
        data = [i for i in data.items()]
    return data

def get_objectDic():
    return json.load(open('./data/objects.json', 'r'))

In [5]:
class GANLoader(data.Dataset):
    def __init__(self, mode, image_size=64):
        self.mode = mode   
        self.data = get_data(mode)
        self.obj_dict = get_objectDic()
        self.transformation = transforms.Compose([
                                  transforms.Resize(image_size),
                                  transforms.CenterCrop(image_size),
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                              ])
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.mode == 'train': 
            img_name = self.data[index][0]
            objects = [self.obj_dict[obj] for obj in self.data[index][1]]

            # image preprocess
            img = np.array(Image.open('./data/iclevr/'+img_name))[...,:-1]
            img = self.transformation(Image.fromarray(img))
            
            # condition embedding - one hot
            condition = torch.zeros(24)
            condition = torch.tensor([v+1 if i in objects else v for i,v in enumerate(condition)])
            
            data = (img, condition)
        else:
            # condition embedding - one hot
            objects = [self.obj_dict[obj] for obj in self.data[index]]
            condition = torch.zeros(24)
            data = torch.tensor([v+1 if i in objects else v for i,v in enumerate(condition)])
        
        return data     

In [6]:
trainset = GANLoader('train', image_size=64)
print (trainset[0])
testset = GANLoader('test', image_size=64)
print (testset[0])

(tensor([[[-0.1765, -0.1686, -0.1765,  ..., -0.1765, -0.1843, -0.1765],
         [-0.1686, -0.1686, -0.1686,  ..., -0.1765, -0.1843, -0.1843],
         [-0.1686, -0.1686, -0.1686,  ..., -0.1843, -0.1843, -0.1765],
         ...,
         [-0.0431, -0.0431, -0.0353,  ...,  0.1608,  0.1608,  0.1608],
         [-0.0431, -0.0353, -0.0353,  ...,  0.1608,  0.1608,  0.1686],
         [-0.0431, -0.0353, -0.0353,  ...,  0.1608,  0.1608,  0.1608]],

        [[-0.1765, -0.1686, -0.1765,  ..., -0.1765, -0.1843, -0.1765],
         [-0.1765, -0.1686, -0.1765,  ..., -0.1765, -0.1843, -0.1843],
         [-0.1765, -0.1686, -0.1686,  ..., -0.1843, -0.1843, -0.1843],
         ...,
         [-0.0510, -0.0510, -0.0431,  ...,  0.1451,  0.1451,  0.1373],
         [-0.0510, -0.0431, -0.0431,  ...,  0.1451,  0.1451,  0.1451],
         [-0.0510, -0.0510, -0.0431,  ...,  0.1451,  0.1451,  0.1451]],

        [[-0.1765, -0.1765, -0.1765,  ..., -0.1765, -0.1843, -0.1765],
         [-0.1765, -0.1686, -0.1765,  ..., -

### Models

In [7]:
# custom weights initialization called
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

## Generator

In [8]:
class Generator(nn.Module):
    def __init__(self, latent_size=100, ngf=64):
        super(Generator, self).__init__()
        self.ngf = ngf
        
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d((latent_size+24), ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d( ngf, 3, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. 3 x 64 x 64
        )

    def forward(self, input, condition):
        input = torch.cat((input, condition.view(input.size(0), -1, 1, 1)), 1)
        return self.main(input)

## Discriminator

In [9]:
class Discriminator(nn.Module):
    def __init__(self, ndf=64):
        super(Discriminator, self).__init__()
        self.ndf = ndf
        self.linear = nn.Linear(24, ndf*ndf)
        
        self.main = nn.Sequential(
            # input is 4 x 64 x 64
            nn.Conv2d(4, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input, condition):
        condition = self.linear(condition).view(input.size(0), 1, self.ndf, self.ndf)
        input = torch.cat((input, condition), 1)
        return self.main(input)

### Training

In [10]:
def learning_schedule(epoch):
    G_dict={400:1e-4, 300:2e-4, 200:3e-4, 100:4e-4, 0:5e-4}
    D_dict={400:2e-6, 300:4e-6, 200:6e-6, 100:8e-6, 0:1e-5}
    for k in G_dict.keys():
        if epoch >= k:
            return G_dict[k], D_dict[k]

In [11]:
def training(G, D, image_size, latent_size, learning_rate, batch_size, num_epochs):
    start = time.time()
    real_label = 1
    fake_label = 0
    
    # recording list
    G_losses = []
    D_losses = []
    
    # init dataloader 
    trainset = GANLoader('train', image_size=64)
    trainloader = data.DataLoader(trainset, batch_size, num_workers=2, shuffle=True)

    # init criterion & optimizer
    criterion = nn.BCELoss()
    optimizerD = optim.Adam(D.parameters(), lr=learning_rate, betas=(2e-4, 0.999))
    optimizerG = optim.Adam(G.parameters(), lr=learning_rate, betas=(2e-4, 0.999))
    
    for epoch in range(num_epochs):
        # learning rate decay
        optimizerG.param_groups[0]['lr'] = learning_schedule(epoch)[0]
        optimizerD.param_groups[0]['lr'] = learning_schedule(epoch)[1]
        
        for idx, datas in enumerate(trainloader):
            b_size = datas[0].size(0)
            img = datas[0].to(device)
            condition = datas[1].to(device)
            
            #------part1 - train discriminator: maximize log(D(x)) + log(1 - D(G(z)))-----#
            ## all real batch
            D.zero_grad()
            label = torch.full((b_size,), real_label, device=device)
            output = D(img, condition).view(-1)
            
            errD_real = criterion(output, label)
            errD_real.backward()
            D_x = output.mean().item() 

            ## all fake batch
            noise = torch.randn(b_size, latent_size, 1, 1, device=device)
            fake = G(noise, condition)
            label.fill_(fake_label)
            
            output = D(fake.detach(), condition).view(-1)
            
            errD_fake = criterion(output, label)
            errD_fake.backward()
            D_G_z1 = output.mean().item()
            
            errD = errD_real + errD_fake
            
            # Update D
            optimizerD.step()
            
            #------part2 - train generator: maximize log(D(G(z)))-----#
            G.zero_grad()
            label.fill_(real_label)
            output = D(fake, condition).view(-1)
            
            errG = criterion(output, label)
            errG.backward()
            D_G_z2 = output.mean().item()
            
            # Update G
            optimizerG.step()

        acc,_ = testing(G, D, latent_size, batch_size)
        if acc>0.5556 :
            print ("Model save...")
            torch.save(G, "./models/G_{:.4f}.ckpt".format(acc))
            torch.save(D, "./models/D_{:.4f}.ckpt".format(acc))
            
        if epoch % 1 == 0:
            print('%s (%d %d%%) Accuracy: %.4f Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
                          % (timeSince(start, (epoch+1)/num_epochs), epoch, epoch/num_epochs * 100, 
                              acc, errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
        
        G_losses.append(errG.item())
        D_losses.append(errD.item())
        collected = gc.collect()
        torch.cuda.empty_cache()
    return G_losses, D_losses

### Testing

In [12]:
# Output generate images - https://pytorch.org/docs/stable/torchvision/utils.html
def testing(G, D, latent_size, batch_size):
    from evaluator import evaluation_model
    E = evaluation_model()
    
    img_list = []
    acc_list = []
    
    # init dataloader 
    testset = GANLoader('test')
    testloader = data.DataLoader(testset, batch_size, num_workers=2)
    
    with torch.no_grad():
        for condition in testloader:
            condition = condition.to(device)
            b_size = condition.size(0)
            noise = torch.randn(b_size, latent_size, 1, 1, device=device)
            fake = G(noise, condition).detach()
            
            acc_list.append(E.eval(fake, condition))
            img_list.append(make_grid(fake, nrow=8, padding=2, normalize=True).cpu())

    return sum(acc_list)/len(acc_list), img_list

### Main

In [None]:
image_size = 64 
latent_size = 100
learning_rate = 0.0002
batch_size = 128
num_epochs = 700


G = Generator(latent_size=100, ngf=64).to(device)
G.apply(weights_init)
D = Discriminator(ndf=64).to(device)
D.apply(weights_init)
G_losses, D_losses = training(G, D, image_size, latent_size, learning_rate, batch_size, num_epochs)
show_result(G_losses, D_losses)

0m 34s (- 402m 27s) (0 0%) Accuracy: 0.0833 Loss_D: 8.6278 Loss_G: 0.0027 D(x): 0.5001 D(G(z)): 0.9992 / 0.9973
1m 8s (- 399m 27s) (1 0%) Accuracy: 0.1389 Loss_D: 7.0874 Loss_G: 0.0251 D(x): 0.1381 D(G(z)): 0.9893 / 0.9753
1m 42s (- 397m 2s) (2 0%) Accuracy: 0.0972 Loss_D: 5.5774 Loss_G: 0.0538 D(x): 0.3851 D(G(z)): 0.9836 / 0.9480
2m 16s (- 395m 35s) (3 0%) Accuracy: 0.0833 Loss_D: 5.1737 Loss_G: 0.1270 D(x): 0.1982 D(G(z)): 0.9367 / 0.8812
2m 50s (- 394m 9s) (4 0%) Accuracy: 0.0694 Loss_D: 5.8533 Loss_G: 0.1341 D(x): 0.0535 D(G(z)): 0.9145 / 0.8749
3m 24s (- 393m 35s) (5 0%) Accuracy: 0.0972 Loss_D: 4.1786 Loss_G: 0.1251 D(x): 0.2854 D(G(z)): 0.9255 / 0.8826
3m 57s (- 392m 40s) (6 0%) Accuracy: 0.0833 Loss_D: 3.7001 Loss_G: 0.2355 D(x): 0.3335 D(G(z)): 0.8887 / 0.7910
4m 31s (- 391m 38s) (7 1%) Accuracy: 0.0556 Loss_D: 0.2959 Loss_G: 8.4622 D(x): 0.8038 D(G(z)): 0.0002 / 0.0002
5m 5s (- 390m 56s) (8 1%) Accuracy: 0.0833 Loss_D: 3.1466 Loss_G: 0.3978 D(x): 0.2327 D(G(z)): 0.7141 / 0.6

42m 36s (- 360m 28s) (73 10%) Accuracy: 0.1250 Loss_D: 1.3946 Loss_G: 0.6753 D(x): 0.4914 D(G(z)): 0.4947 / 0.5095
43m 11s (- 359m 56s) (74 10%) Accuracy: 0.2222 Loss_D: 1.4039 Loss_G: 0.6910 D(x): 0.4859 D(G(z)): 0.4931 / 0.5016
43m 46s (- 359m 23s) (75 10%) Accuracy: 0.0833 Loss_D: 1.3869 Loss_G: 0.7132 D(x): 0.5014 D(G(z)): 0.5005 / 0.4909
44m 20s (- 358m 48s) (76 10%) Accuracy: 0.1806 Loss_D: 1.3997 Loss_G: 0.7828 D(x): 0.5320 D(G(z)): 0.5352 / 0.4577
44m 54s (- 358m 8s) (77 11%) Accuracy: 0.1389 Loss_D: 1.3981 Loss_G: 0.6475 D(x): 0.4567 D(G(z)): 0.4582 / 0.5239
45m 28s (- 357m 26s) (78 11%) Accuracy: 0.0972 Loss_D: 1.3947 Loss_G: 0.7361 D(x): 0.5231 D(G(z)): 0.5255 / 0.4793
46m 2s (- 356m 51s) (79 11%) Accuracy: 0.1250 Loss_D: 1.3905 Loss_G: 0.6364 D(x): 0.4707 D(G(z)): 0.4703 / 0.5297
46m 37s (- 356m 17s) (80 11%) Accuracy: 0.1528 Loss_D: 1.3810 Loss_G: 0.6606 D(x): 0.4686 D(G(z)): 0.4631 / 0.5169
47m 11s (- 355m 36s) (81 11%) Accuracy: 0.1250 Loss_D: 1.3910 Loss_G: 0.6264 D(x):

In [None]:
D = torch.load('./models/D_0.5556.ckpt')
G = torch.load('./models/G_0.5556.ckpt')
acc, imgs = testing(G, D, latent_size, batch_size)
fig = plt.figure(figsize=(15,15))
plt.imshow(np.transpose(imgs[0],(1,2,0)))
print ("Accuracy: %.4f"%(acc))