### DLP Lab6
Goal of this lab is to implement a conditional GAN to generate synthetic images according to different conditions
1. Shape: cube, sphere, cylinder
2. Color: gray, red, blue, green, brown, purple, cyan, yellow

#### Implement detail
1. Implement training, testing functions, and dataloader
2. Choose your conditional GAN architecture
3. Design your generator and discriminator
4. Choose your loss function
5. Output the results based on test.json and new_test.json (will be released before demo)

Date: 2020/05/

In [1]:
import numpy as np
import json
import time
import math
import zipfile
from tqdm import tqdm
from PIL import Image
import torch
import torch.nn as nn
from torch import optim
from torch.utils import data
import torchvision.transforms as transforms
import torchvision.utils as vutils
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

### Dataloader

In [3]:
def get_data(mode):
    assert mode == 'train' or mode == 'test'
    data = json.load(open('./data/'+mode+'.json', 'r'))
    if mode == 'train':
        data = [i for i in data.items()]
    return data

def get_objectDic():
    return json.load(open('./data/objects.json', 'r'))

In [4]:
class GANLoader(data.Dataset):
    def __init__(self, mode, image_size):
        self.mode = mode   
        self.data = get_data(mode)
        self.obj_dict = get_objectDic()
        self.transformation = transforms.Compose([
                                  transforms.Resize(image_size),
                                  transforms.CenterCrop(image_size),
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                              ])
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.mode == 'train': 
            img_name = self.data[index][0]
            objects = [self.obj_dict[obj] for obj in self.data[index][1]]

            # image preprocess
            zippedImgs = zipfile.ZipFile('./data/iclevr.zip')
            img = np.array(Image.open(zippedImgs.open(img_name)))[...,:-1]
            img = self.transformation(Image.fromarray(img))
            
            # condition embedding - one hot
            condition = torch.zeros(24)
            condition = torch.tensor([v+1 if i in objects else v for i,v in enumerate(condition)])
            
            data = (img, condition)
        else:
            # condition embedding - one hot
            objects = [self.obj_dict[obj] for obj in self.data[index]]
            condition = torch.zeros(24)
            data = torch.tensor([v+1 if i in objects else v for i,v in enumerate(condition)])
        
        return data     

In [5]:
trainset = GANLoader('train', image_size=64)
print (trainset[0])
testset = GANLoader('test', image_size=64)
print (testset[0])

(tensor([[[-0.1765, -0.1686, -0.1765,  ..., -0.1765, -0.1843, -0.1765],
         [-0.1686, -0.1686, -0.1686,  ..., -0.1765, -0.1843, -0.1843],
         [-0.1686, -0.1686, -0.1686,  ..., -0.1843, -0.1843, -0.1765],
         ...,
         [-0.0431, -0.0431, -0.0353,  ...,  0.1608,  0.1608,  0.1608],
         [-0.0431, -0.0353, -0.0353,  ...,  0.1608,  0.1608,  0.1686],
         [-0.0431, -0.0353, -0.0353,  ...,  0.1608,  0.1608,  0.1608]],

        [[-0.1765, -0.1686, -0.1765,  ..., -0.1765, -0.1843, -0.1765],
         [-0.1765, -0.1686, -0.1765,  ..., -0.1765, -0.1843, -0.1843],
         [-0.1765, -0.1686, -0.1686,  ..., -0.1843, -0.1843, -0.1843],
         ...,
         [-0.0510, -0.0510, -0.0431,  ...,  0.1451,  0.1451,  0.1373],
         [-0.0510, -0.0431, -0.0431,  ...,  0.1451,  0.1451,  0.1451],
         [-0.0510, -0.0510, -0.0431,  ...,  0.1451,  0.1451,  0.1451]],

        [[-0.1765, -0.1765, -0.1765,  ..., -0.1765, -0.1843, -0.1765],
         [-0.1765, -0.1686, -0.1765,  ..., -

### Models

In [6]:
# custom weights initialization called
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

## Generator

In [7]:
class Generator(nn.Module):
    def __init__(self, latent_size=100, ngf=64):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d(latent_size, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d( ngf, 3, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. 3 x 64 x 64
        )

    def forward(self, input):
        return self.main(input)
    
    def weights_init(self):
        for m in self._modules:
            normal_init(self._modules[m], mean, std)

## Discriminator

In [8]:
class Discriminator(nn.Module):
    def __init__(self, ndf=64):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            # input is 3 x 64 x 64
            nn.Conv2d(3, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

### Training

In [9]:
def training(G, D, image_size, latent_size, learning_rate, batch_size, num_epochs):
    start = time.time()
    real_label = 1
    fake_label = 0
    
    # recording list
    img_list = []
    G_losses = []
    D_losses = []
    
    # init dataloader 
    trainset = GANLoader('train', image_size=64)
    trainloader = data.DataLoader(trainset, batch_size, shuffle=True)

    # init criterion & optimizer
    criterion = nn.BCELoss()
    optimizerD = optim.Adam(D.parameters(), lr=learning_rate, betas=(2e-4, 0.999))
    optimizerG = optim.Adam(G.parameters(), lr=learning_rate, betas=(2e-4, 0.999))
    
    # init noise
    fixed_noise = torch.randn(64, latent_size, 1, 1, device=device)
    
    for epoch in range(num_epochs):
        for idx, datas in enumerate(trainloader):
            b_size = datas[0].size(0)
            #------part1 - train discriminator: maximize log(D(x)) + log(1 - D(G(z)))-----#
            ## all real batch
            D.zero_grad()
            
            img = datas[0].to(device)
            condition = datas[1].to(device)
            
#             # cat condition
#             condition = torch.cat((condition, condition, condition), 0)
#             img = torch.cat((img, condition), 1)
            
            label = torch.full((b_size,), real_label, device=device)
            
            output = D(img).view(-1)
            
            errD_real = criterion(output, label)
            errD_real.backward()
            D_x = output.mean().item() 

            ## all fake batch
            noise = torch.randn(b_size, latent_size, 1, 1, device=device)
            fake = G(noise)
            label.fill_(fake_label)
            
            output = D(fake.detach()).view(-1)
            
            errD_fake = criterion(output, label)
            errD_fake.backward()
            D_G_z1 = output.mean().item()
            
            errD = errD_real + errD_fake
            
            # Update D
            optimizerD.step()
            
            #------part2 - train generator: maximize log(D(G(z)))-----#
            G.zero_grad()
            label.fill_(real_label)
            output = D(fake).view(-1)
            
            errG = criterion(output, label)
            errG.backward()
            D_G_z2 = output.mean().item()
            
            # Update G
            optimizerG.step()

            if idx % 10 == 0:
                print('%s [%2d/%d][%3d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                          % (timeSince(start, epoch+1 / num_epochs), epoch, num_epochs, idx, len(trainloader),
                             errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))

        G_losses.append(errG.item())
        D_losses.append(errD.item())

        with torch.no_grad():
            fake = G(fixed_noise).detach().cpu()
        img_list.append(vutils.make_grid(fake, padding=2, normalize=True))

### Testing

In [10]:
# Output generate images - https://pytorch.org/docs/stable/torchvision/utils.html



### Main

In [None]:
image_size = 64 
latent_size = 100
learning_rate = 0.0002
batch_size = 128
num_epochs = 10


G = Generator(latent_size=100, ngf=64).to(device)
G.apply(weights_init)
D = Discriminator(ndf=64).to(device)
D.apply(weights_init)
training(G, D, image_size, latent_size, learning_rate, batch_size, num_epochs)

0m 13s (- 2m 0s) [ 0/10][  0/141]	Loss_D: 1.9867	Loss_G: 2.3837	D(x): 0.2635	D(G(z)): 0.3132 / 0.1397
2m 21s (- 21m 11s) [ 0/10][ 10/141]	Loss_D: 0.2229	Loss_G: 5.7078	D(x): 0.8919	D(G(z)): 0.0023 / 0.0059
4m 31s (- 40m 47s) [ 0/10][ 20/141]	Loss_D: 0.0222	Loss_G: 5.7202	D(x): 0.9938	D(G(z)): 0.0152 / 0.0055
6m 41s (- 60m 9s) [ 0/10][ 30/141]	Loss_D: 0.0937	Loss_G: 12.7969	D(x): 0.9880	D(G(z)): 0.0748 / 0.0000
8m 50s (- 79m 33s) [ 0/10][ 40/141]	Loss_D: 0.0093	Loss_G: 8.1744	D(x): 0.9921	D(G(z)): 0.0004 / 0.0004
11m 3s (- 99m 34s) [ 0/10][ 50/141]	Loss_D: 0.0700	Loss_G: 18.5132	D(x): 0.9976	D(G(z)): 0.0630 / 0.0000
13m 15s (- 119m 17s) [ 0/10][ 60/141]	Loss_D: 0.0629	Loss_G: 13.0088	D(x): 0.9553	D(G(z)): 0.0000 / 0.0000
15m 26s (- 138m 58s) [ 0/10][ 70/141]	Loss_D: 0.4217	Loss_G: 44.9765	D(x): 0.9951	D(G(z)): 0.2675 / 0.0000
17m 38s (- 158m 44s) [ 0/10][ 80/141]	Loss_D: 0.0021	Loss_G: 13.5453	D(x): 0.9980	D(G(z)): 0.0000 / 0.0000
19m 47s (- 178m 9s) [ 0/10][ 90/141]	Loss_D: 3.0572	Loss