### Import Libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import matplotlib.pyplot as plt
import numpy as np

### Parameters

In [20]:
params = {
    "bsize" : 128,# Batch size during training.
    'imsize' : 64,# Spatial size of training images. All images will be resized to this size during preprocessing.
    'nc' : 3,# Number of channles in the training images. For coloured images this is 3.
    'nz' : 100,# Size of the Z latent vector (the input to the generator).
    'ngf' : 64,# Size of feature maps in the generator. The depth will be multiples of this.
    'ndf' : 64, # Size of features maps in the discriminator. The depth will be multiples of this.
    'nepochs' : 1,# Number of training epochs.
    'lr' : 0.0002,# Learning rate for optimizers
    'beta1' : 0.5,# Beta1 hyperparam for Adam optimizer
    'save_epoch' : 2}# Save step.

### Data Preprocessing and Data Loader

In [41]:
# Dataset link
# https://drive.google.com/drive/folders/0B7EVK8r0v71pTUZsaXdaSnZBZzg

In [24]:
dataset_root = "data/"
transform = transforms.Compose([
    transforms.Resize(params['imsize']),
    transforms.CenterCrop(params['imsize']),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

device = torch.device("cudo:0" if (torch.cuda.is_available()) else "cpu")
dataset = dset.ImageFolder(root=dataset_root, transform=transform)

dataloader = torch.utils.data.DataLoader(dataset, batch_size=params['bsize'], shuffle=True)

sample_batch = next(iter(dataloader))


plt.figure(figsize=(8,8))
plt.axis('off')
plt.title('Training Images')
plt.imshow(np.transpose(vutils.make_grid(sample_batch[0].to(device)[:64], padding=2, normalize=True).cpu(), (1,2,0)))
plt.show()

### Weight initialization

In [2]:
def weights_init(w):
    classname = w.__class__.__name__
    if classname.find('conv') != -1:
        nn.init.normal_(w.weight.data, 0.0, 0.02)
    elif classname.find('bn') != -1:
        nn.init.normal_(w.weight.data, 1.0, 0.02)
        nn.init.normal_(w.bias.data, 0)

## Generator Network

In [21]:
class Generator(nn.Module):
    def __init__(self, params):
        super(Generator, self).__init__()

        # Input is the latent vector Z.
        self.tconv1 = nn.ConvTranspose2d(params['nz'], params['ngf']*8, kernel_size=4, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(params['ngf']*8)

        # Input Dimension: (ngf*8) x 4 x 4
        self.tconv2 = nn.ConvTranspose2d(params['ngf']*8, params['ngf']*4, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(params['ngf']*4)

        # Input Dimension: (ngf*4) x 8 x 8
        self.tconv3 = nn.ConvTranspose2d(params['ngf']*4, params['ngf']*2, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(params['ngf']*2)

        # Input Dimension: (ngf*2) x 16 x 16
        self.tconv4 = nn.ConvTranspose2d(params['ngf']*2, params['ngf'], kernel_size=4, stride=2, padding=1, bias=False)
        self.bn4 = nn.BatchNorm2d(params['ngf'])

        # Input Dimension: (ngf) * 32 * 32
        self.tconv5 = nn.ConvTranspose2d(params['ngf'], params['nc'], kernel_size=4, stride=2, padding=1, bias=False)
        #Output Dimension: (nc) x 64 x 64

    def forward(self, x):
        x = F.relu(self.bn1(self.tconv1(x)))
        x = F.relu(self.bn2(self.tconv2(x)))
        x = F.relu(self.bn3(self.tconv3(x)))
        x = F.relu(self.bn4(self.tconv4(x)))

        x = F.tanh(self.tconv5(x))

        return x

## Discriminator Network

In [22]:
class Discriminator(nn.Module):
    def __init__(self, params):
        super(Discriminator, self).__init__()
        
        # Input: nc*64*64
        self.conv1 = nn.Conv2d(params['nc'], params['ndf'], kernel_size=4, stride=2, padding=1, bias=False)
        
        # Input: ndf*32*32
        self.conv2 = nn.Conv2d(params['ndf'], params['ndf']*2, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(params['ndf']*2)
        
        # Input: (ndf*2)*16*16
        self.conv3 = nn.Conv2d(params['ndf']*2, params['ndf']*4, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(params['ndf']*4)
        
        # Input: (ndf*4)*8*8
        self.conv4 = nn.Conv2d(params['ndf']*4, params['ndf']*8, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn4 = nn.BatchNorm2d(params['ndf']*8)
        
        # Input: (ndf*8)*4*4
        self.conv5 = nn.Conv2d(params['ndf']*8, 1, kernel_size=4, stride=2, padding=0, bias=False)
        
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x), 0.2, True)
        x = F.leaky_relu(self.bn2(self.conv2(x)), 0.2, True)
        x = F.leaky_relu(self.bn3(self.conv3(x)), 0.2, True)
        x = F.leaky_relu(self.bn4(self.conv4(x)), 0.2, True)
        x = F.sigmoid(self.conv5(x))
        
        return x

### Create Generator :

In [30]:
netG = Generator(params)
netG.apply(weights_init)
print(netG)

Generator(
  (tconv1): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
  (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (tconv2): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (tconv3): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (tconv4): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (tconv5): ConvTranspose2d(64, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
)


### Create Dicriminator

In [31]:
netD = Discriminator(params)
netD.apply(weights_init)
print(netD)

Discriminator(
  (conv1): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (conv2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(512, 1, kernel_size=(4, 4), stride=(2, 2), bias=False)
)


#### Entropy Loss

In [32]:
criterion = nn.BCELoss()

#### Optimizer

In [34]:
optimizerG = optim.Adam(netG.parameters(), lr=params['lr'], betas = (params['beta1'], 0.999))
optimizerD = optim.Adam(netD.parameters(), lr=params['lr'], betas = (params['beta1'], 0.999))

### Training

In [36]:
real_label = 1
fake_label = 0

# stores generated images as training progresses
img_lis = []

#stores discriminator losses during training
D_losses = []

#stores generator loss during training
G_losses = []


In [39]:
iters = 0 
print("Starting training loop: ... ")
print("*"*130)

for epoch in range(params['nepochs']):
    for i, data in enumerate(dataloader,0):
        real_data = data[0].to(device)
        b_size = real_data.size(0)
        
        # Make accumulated gradient of the disciminator zero
        netD.zero_grad()
        # create label for real data (label=1)
        label = torch.full((b_size, ), real_label, device=device)
        output = netD(real_data).view(-1)
        errorD_real = criterion(output,label)
        
        # Calculate gradient for backpropagation
        errorD_real.backward()
        D_X = output.mean().item()
        
        # Sample random data from a unit normal distribution
        noise = torch.randn(b_size, params['nz'], 1, 1, device=device)
        
        # generate fake images
        fake_data = netG(noise)
        
        #create label for fake data (label=0)
        label.fill_(fake_label)
        
        """
        Calculate the output of the discriminator of the fake data.
        As no gradients w.r.t. the generator parameters are to be
        calculated, detach() is used. Hence, only gradients w.r.t. the
        discriminator parameters will be calculated.
        This is done because the loss functions for the discriminator
        and the generator are slightly different.
        """
        
        output = netD(fake_data.detach()).view(-1)
        errorD_fake = criterion(output, label)
        errorD_fake.backward()
        D_G_Z1 = output.mean().item()
        
        # Net discriminator loss
        errD = errorD_real + errorD_fake
        
        #update discriminator parameters
        optimizerD.step()
        
        # Make accumalted gradients of the generator zero.
        netG.zero_grad()
        
        # we want the fake data to be classified as real. Hence real labels are used (label=1)
        label.fill_(real_label)
        output = netD(fake_data).view(-1)
        errG = criterion(output, label)
        
        """
        gradients for backpropagation are calculated. Gradients w.r.t both the generator
        and discriminator parameters are calculated. However, the generators optimizer will
        only update the parameters of the generator. The discriminator gradients will be 
        set to zero in the next iteration by netD.zero_grad()
        """
        errG.backward()
        
        D_G_Z2 = output.mean().item()
        
        # update the generator parameters
        
        optimizerG.step()
        
        # Check the progress of training 
        
        if i%50 == 0:
            print("Is cuda avaialable: ", torch.cuda.is_available())
            print('(%d/%d)(%d/%d)\t loss_D: %.4f\t loss_G: %.4f\t D(x): %.4f\tD(G(z)): %.4f/%.4f' 
                  %(epoch, params['nepochs'], i, len(dataloader),errD.item(), errG.item(), D_X, D_G_Z1, D_G_Z2))


Starting training loop: ... 
****************************************************************************************************
Is cuda avaialable:  False
(0/1)(0/1583)	 loss_D: 0.4348	 loss_G: 4.5222	 D(x): 0.8832	D(G(z)): 0.2491/0.0116
Is cuda avaialable:  False
(0/1)(50/1583)	 loss_D: 0.1726	 loss_G: 6.3919	 D(x): 0.8911	D(G(z)): 0.0021/0.0024
Is cuda avaialable:  False
(0/1)(100/1583)	 loss_D: 0.0925	 loss_G: 6.3017	 D(x): 0.9499	D(G(z)): 0.0332/0.0025
Is cuda avaialable:  False
(0/1)(150/1583)	 loss_D: 0.4693	 loss_G: 3.1881	 D(x): 0.7775	D(G(z)): 0.1691/0.0517
Is cuda avaialable:  False
(0/1)(200/1583)	 loss_D: 0.5013	 loss_G: 3.2970	 D(x): 0.8206	D(G(z)): 0.2382/0.0427
Is cuda avaialable:  False
(0/1)(250/1583)	 loss_D: 0.3916	 loss_G: 3.5455	 D(x): 0.8979	D(G(z)): 0.2355/0.0370


KeyboardInterrupt: 