In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import importlib
import argparse

In [2]:
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.autograd as autograd
from torch.autograd import Variable
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torch.optim as optim
from torchvision import transforms
import torchvision.utils as vutils
from torchnet.meter import AverageValueMeter

# Set parameters

In [3]:
parser = {
    'dataset': 'cifar10',
    'dataroot': './data',
    'workers': 2,
    'batchSize': 64,
    'imageSize': 64,
    'nz': 100,
    'ngf': 64,
    'ndf': 64,
    'epochs': 25, 
    'lr': 1e-3,
    'beta1': 0.5,
    'netG': '',
    'netD': '',
    'outf': './output',
    'ngpu': 0,
    'manualSeed': 7,
    'no_cuda': True,
}
args = argparse.Namespace(**parser)

In [4]:
print(args)

Namespace(batchSize=64, beta1=0.5, dataroot='./data', dataset='cifar10', epochs=25, imageSize=64, lr=0.001, manualSeed=7, ndf=64, netD='', netG='', ngf=64, ngpu=0, no_cuda=True, nz=100, outf='./output', workers=2)


In [5]:
ngpu = int(args.ngpu)
nz = int(args.nz)
ngf = int(args.ngf)
ndf = int(args.ndf)
nc = 3  #rgb: image chanel

# Load data

In [6]:
dataset = dset.CIFAR10(root=args.dataroot, download=True, transform=transforms.Compose([
                                                        transforms.Scale(args.imageSize),
                                                        transforms.ToTensor(),
                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

assert dataset

Files already downloaded and verified


In [7]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batchSize, shuffle=True, num_workers=int(args.workers))

# Define class

In [8]:
# custom weights initialization called on netG and netD
# m: layer of model
def weights_init(m):
    classname = m.__class__.__name__  #returns the name of class of m
    if classname.find('Conv') != -1:  #name contains Conv
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1: #name contains BatchNorm, this can be seen like activation function after batchnorm
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

```class torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True)```

```python
def __init__(self):
        self._backend = thnn_backend
        self._parameters = OrderedDict()
        self._buffers = OrderedDict()
        self._backward_hooks = OrderedDict()
        self._forward_hooks = OrderedDict()
        self._modules = OrderedDict()
        self.training = True
```

```class torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True)```

In [9]:
#Define class Discriminator:
class _netG(nn.Module):
    def __init__(self, ngpu):
        super(_netG, self).__init__()  #no need to list __init__ of nn.Module
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64 --> image size = 64
        )

    def forward(self, input):
        gpu_ids = None
        #if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
            #gpu_ids = range(self.ngpu)
        return nn.parallel.data_parallel(self.main, input, gpu_ids)

In [10]:
netG = _netG(ngpu)
netG.apply(weights_init)
if args.netG != '':
    netG.load_state_dict(torch.load(args.netG))
print(netG)

_netG (
  (main): Sequential (
    (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU (inplace)
    (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU (inplace)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (8): ReLU (inplace)
    (9): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (11): ReLU (inplace)
    (12): ConvTranspose2d(64, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): Tanh ()
  )
)


```class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)```

In [11]:
class _netD(nn.Module):
    def __init__(self, ngpu):
        super(_netD, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        gpu_ids = None
        if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
            gpu_ids = range(self.ngpu)
        output = nn.parallel.data_parallel(self.main, input, gpu_ids)
        return output.view(-1, 1)

In [12]:
netD = _netD(ngpu)
netD.apply(weights_init)
if args.netD != '':
    netD.load_state_dict(torch.load(args.netD))
print(netD)

_netD (
  (main): Sequential (
    (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): LeakyReLU (0.2, inplace)
    (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (4): LeakyReLU (0.2, inplace)
    (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (7): LeakyReLU (0.2, inplace)
    (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
    (10): LeakyReLU (0.2, inplace)
    (11): Conv2d(512, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (12): Sigmoid ()
  )
)


# Train

In [13]:
criterion = nn.BCELoss()

```class torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)```

In [14]:
optimizerD = optim.Adam(netD.parameters(), lr=args.lr, betas=(args.beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=args.lr, betas=(args.beta1, 0.999))

**NOTE**: 
if update Pytorch, the steps when training will be changed:

buffer --> backward --> update _**CHANGED TO**_ backward --> buffer --> update

In [18]:
def train(epoch):
    for i, (X, _) in enumerate(dataloader):
        
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        
        
        # Create ones_label and zeros_label
        ones_label = Variable(torch.ones(X.size(0)))
        zeros_label = Variable(torch.zeros(X.size(0)))
        z = Variable(torch.randn(X.size(0), nz, 1,1))
        
        #train with real
        X = Variable(X)
        output = netD(X)
        errD_real = criterion(output, ones_label)
        
        netD.zero_grad()
        errD_real.backward()
        
        D_x = output.data.mean()
        
        #train with fake
        fake = netG(z)
        output = netD(fake.detach())   #Use detach so we just need to creat z once
        errD_fake = criterion(output, zeros_label)
        
        errD_fake.backward()
        D_G_z1 = output.data.mean()
        
        
        errD = errD_real + errD_fake
        optimizerD.step()
        
        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        
        output = netD(fake)
        errG = criterion(output, ones_label)
        
        netG.zero_grad()
        errG.backward()
        
        D_G_z2 = output.data.mean()
        
        optimizerG.step()
        
        print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
              % (epoch, args.epochs, i, len(dataloader),
                 errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2))
        if i % 100 == 0:
            plot(X, epoch)

In [22]:
def plot(X, epoch):
    z = Variable(torch.randn(X.size(0), nz, 1,1))
    vutils.save_image(X.data, '%s/real_samples.png' % args.outf)
    fake = netG(z)
    vutils.save_image(fake.data,
                    '%s/fake_samples_epoch_%03d.png' % (args.outf, epoch))

In [23]:
for epoch in range(1, 6):
    train(epoch)

[1/25][0/782] Loss_D: 27.6335 Loss_G: -0.0000 D(x): 0.9977 D(G(z)): 1.0000 / 1.0000
[1/25][1/782] Loss_D: 27.6315 Loss_G: -0.0000 D(x): 0.9995 D(G(z)): 1.0000 / 1.0000
[1/25][2/782] Loss_D: 27.6312 Loss_G: -0.0000 D(x): 0.9998 D(G(z)): 1.0000 / 1.0000
[1/25][3/782] Loss_D: 27.6311 Loss_G: -0.0000 D(x): 0.9999 D(G(z)): 1.0000 / 1.0000
[1/25][4/782] Loss_D: 27.6313 Loss_G: -0.0000 D(x): 0.9998 D(G(z)): 1.0000 / 1.0000
[1/25][5/782] Loss_D: 27.6311 Loss_G: -0.0000 D(x): 0.9999 D(G(z)): 1.0000 / 1.0000
[1/25][6/782] Loss_D: 27.6312 Loss_G: -0.0000 D(x): 0.9998 D(G(z)): 1.0000 / 1.0000
[1/25][7/782] Loss_D: 27.6317 Loss_G: -0.0000 D(x): 0.9993 D(G(z)): 1.0000 / 1.0000
[1/25][8/782] Loss_D: 27.6311 Loss_G: -0.0000 D(x): 0.9999 D(G(z)): 1.0000 / 1.0000
[1/25][9/782] Loss_D: 27.6311 Loss_G: -0.0000 D(x): 0.9999 D(G(z)): 1.0000 / 1.0000
[1/25][10/782] Loss_D: 27.6311 Loss_G: -0.0000 D(x): 0.9999 D(G(z)): 1.0000 / 1.0000
[1/25][11/782] Loss_D: 27.6316 Loss_G: -0.0000 D(x): 0.9994 D(G(z)): 1.0000

Process Process-8:
Process Process-7:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 26, in _worker_loop
    r = index_queue.get()
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/Users/hoangnguye

KeyboardInterrupt: 