**Handling all the imports and setting up cuda runtime

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

from torch.autograd import Variable

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# The size of batches used for training and generation
batch_size = 100

# The dimension of the noise vector used for generation
noise_dim = 100

**Loading the training data

In [None]:
t = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST('/files/', train=True, download=True, transform=t)
train_loader = torch.utils.data.DataLoader(train_dataset,  batch_size=batch_size, shuffle=True)

In [None]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, .04)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, .04)
        nn.init.constant_(m.bias.data, 0)

**Load one of the following GAN architectures. The first one is a vanilla GAN, the second one is 2 hidden layer DCGAN, and the last one is a 3-hidden layer DCGAN **

**Vanilla GAN

In [None]:
class Generator(nn.Module):
  def __init__(self):
    super(Generator, self).__init__()
    self.fc1 = nn.Linear(100, 256)
    self.fc2 = nn.Linear(256, 512)
    self.fc3 = nn.Linear(512, 1024)
    self.fc4 = nn.Linear(1024, 784)

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = F.relu(self.fc3(x))
    x = torch.tanh(self.fc4(x)).reshape(-1, 1, 28, 28)
    return x

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(784, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 1)
    
    def forward(self, x):
        x = x.reshape(-1, 784)
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc3(x), 0.2)
        x = F.dropout(x, 0.3)
        return torch.sigmoid(self.fc4(x))

**2 hidden-layer DCGAN

In [None]:
class Generator(nn.Module):
  def __init__(self):
    super(Generator, self).__init__()
    self.conv1 = nn.ConvTranspose2d(noise_dim, 256, kernel_size=7, bias=False)
    self.conv2 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False)
    self.conv3 = nn.ConvTranspose2d(128, 1, kernel_size=4, stride=2, padding=1, bias=False)
    self.batch1 = nn.BatchNorm2d(256)
    self.batch2 = nn.BatchNorm2d(128)
  
  def forward(self, x):
    x = F.relu(self.batch1(self.conv1(x.reshape(-1, noise_dim, 1, 1))))
    x = F.relu(self.batch2(self.conv2(x)))
    x = torch.tanh(self.conv3(x))
    return x

class Discriminator(nn.Module):
  def __init__(self):
    super(Discriminator, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, kernel_size=4, stride=2, padding=1, bias=False)
    self.batch1 = nn.BatchNorm2d(32)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1, bias=False)
    self.batch2 = nn.BatchNorm2d(64)
    self.fc1 = nn.Linear(3136, 1, bias=False)
  
  def forward(self, x):
    x = F.leaky_relu(self.batch1(self.conv1(x)), 0.2)
    x = F.leaky_relu(self.batch2(self.conv2(x)), 0.2).reshape(-1, 3136)
    x = torch.sigmoid(self.fc1(x))
    return x


**3 hidden-layer DCGAN

In [None]:
class Generator(nn.Module):
  def __init__(self):
    super(Generator, self).__init__()
    self.conv1 = nn.ConvTranspose2d(100, 256, kernel_size=4, bias=False)
    self.conv2 = nn.ConvTranspose2d(256, 256, kernel_size=4, stride=2, padding=1, bias=False)
    self.conv3 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False)
    self.conv4 = nn.ConvTranspose2d(128, 1, kernel_size=4, stride=2, padding=3, bias=False)
    self.batch1 = nn.BatchNorm2d(256)
    self.batch2 = nn.BatchNorm2d(256)
    self.batch3 = nn.BatchNorm2d(128)
  
  def forward(self, x):
    x = F.relu(self.batch1(self.conv1(x.reshape(-1, 100, 1, 1))))
    x = F.relu(self.batch2(self.conv2(x)))
    x = F.relu(self.batch3(self.conv3(x)))
    x = torch.tanh(self.conv4(x))
    return x

class Discriminator(nn.Module):
  def __init__(self):
    super(Discriminator, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, kernel_size=4, stride=2, padding=1, bias=False)
    self.batch1 = nn.BatchNorm2d(32)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1, bias=False)
    self.batch2 = nn.BatchNorm2d(64)
    self.fc1 = nn.Linear(3136, 256, bias=False)
    self.fc2 = nn.Linear(256, 1, bias=False)
  
  def forward(self, x):
    x = F.leaky_relu(self.batch1(self.conv1(x)), 0.2)
    x = F.leaky_relu(self.batch2(self.conv2(x)), 0.2).reshape(-1, 3136)
    x = F.leaky_relu(self.fc1(x))
    x = torch.sigmoid(self.fc2(x))
    return x

In [None]:
G = Generator().to(device)
D = Discriminator().to(device)

G.apply(weights_init)
D.apply(weights_init)

# loss function
criterion = nn.BCELoss() 

# optimizer
lr = 0.0002
optimizerG = optim.Adam(G.parameters(), lr = lr, betas=(0.5, 0.999))
optimizerD = optim.Adam(D.parameters(), lr = lr/4, betas=(0.5, 0.999))

print(G)
print(D)

In [None]:
def train_Generator(x):
    G.zero_grad()

    inputs = Variable(torch.randn(batch_size, noise_dim).to(device))
    labels = Variable(torch.ones(batch_size, 1).to(device))

    G_output = G(inputs)
    D_output = D(G_output)
    loss = criterion(D_output, labels)

    loss.backward()
    optimizerG.step()
        
    return loss.data.item()

def train_Discriminator(x):
    D.zero_grad()

    # training discriminator on real data
    real_input, real_labels = x.view(-1, 784), torch.ones(batch_size, 1)*.9
    real_input, real_labels = Variable(real_input.to(device).reshape(-1, 1, 28, 28)), Variable(real_labels.to(device))

    D_output = D(real_input)
    real_loss = criterion(D_output, real_labels)

    # training disciminator on fake data
    fake_input = Variable(torch.randn(batch_size, noise_dim).to(device))
    fake_input, fake_labels = G(fake_input), Variable(torch.zeros(batch_size, 1).to(device))

    D_output = D(fake_input)
    fake_loss = criterion(D_output, fake_labels)

    loss = real_loss + fake_loss
    loss.backward()
    optimizerD.step()
        
    return  loss.data.item()

**Training loop that trains both the generator and discriminator for each batch of data

In [None]:
n_epoch = 200

for epoch in range(1, n_epoch+1):           
    D_losses, G_losses = [], []
    for batch_idx, x in enumerate(train_loader):
        D_losses.append(train_Discriminator(x[0]))
        G_losses.append(train_Generator(x[0]))
    
    print('[%d/%d]: loss_d: %.3f, loss_g: %.3f' % (
            (epoch), n_epoch, torch.mean(torch.FloatTensor(D_losses)), torch.mean(torch.FloatTensor(G_losses))))

**Helper function that displays generated images from the generator

In [None]:
import torchvision.utils as vutils

def show():
  noise = torch.rand(100, 100).to(device)
  real_batch = G(noise).detach()
  print(real_batch.shape)
  plt.figure(figsize=(10,10))
  plt.axis("off")
  plt.title("Generated Images")
  plt.imshow(np.transpose(vutils.make_grid(real_batch.to(device)[:100], nrow=10, padding=2, normalize=True).cpu(),(1,2,0)))

show()