In [16]:
# Implementation of InfoGAN:Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets, by Xi Chen et al
# The Q loss function can be modified in many ways
# MSE for continuous and fake/real decisions
# Cross Entropy for Latent Code
# Can also Use L1 distance for latent code
# Feature Smoothing not implemented yet
# Implement Minibatch Discrimination and Virtual batch normalization

In [17]:
# Import packages and modules
import numpy as np
import itertools
import math
import os
import torch    # Pytorch 0.4
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
import matplotlib.pyplot as plt
import seaborn as sns
os.chdir("/home/agastya/Downloads")
%matplotlib inline

In [18]:
n_epochs = 200    # number of epochs of training, int, default=200
batch_size = 64    # size of the batches, int, default=64
lr = 0.0002    # learning rate, float, default=0.0002
b1 = 0.5    # first order momentum gradient decay ADAM, float, default=0.5
b2 = 0.999    # second order momentum gradient decay ADAM, float, default=0.999
n_cpu = 8    # number of cpu threads to use during batch generation, int, default=8
latent_dim = 62    # dimensionality of latent space, int, default=100
code_dim = 2    # Latent code, int, default = 2
img_size = 32    # size of each image dimension, int, default=28
channels = 1    # number of image channels, int, default=1
sample_interval = 400    # interval between image samples, int, default=400
n_classes = 10    # number of classes for dataset

In [19]:
## 1.0 Data Preparation and Preprocessing
# Use transforms.Resize(img_size) to resize the image into (batch_size, 1, img_size, img_size)
def mnist_data():
    compose = transforms.Compose([
         transforms.Resize(img_size),
         transforms.ToTensor(),
         transforms.Normalize((.5, .5, .5), (.5, .5, .5)) 
         #Normalized to (-1,1) so as to mimic a tanh activation function
        ])
    out_dir = './dataset'
    return datasets.MNIST(root=out_dir, train=True, transform=compose, download=True)
data = mnist_data()
data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)

In [20]:
# Weights initialization
# Gaussian Distribution works the best
def init_weights(m):
    
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)

In [21]:
cuda = True if torch.cuda.is_available() else False

In [22]:
# Returns one hot encoded variable
def to_categorical(y, num_columns):
    
    y_cat = np.zeros((y.shape[0], num_columns))
    y_cat[range(y.shape[0]), y] = 1
    return torch.tensor(y_cat, dtype=torch.float)

In [23]:
#Generator NN
# DCGAN, with input dims concatenated with classes and latent code
# Upsampling is used instead of ConvTranspose
# Normal convolution is used with kernel_size=3, and dimensions are retained with appropriate padding
class Generator(nn.Module):
    
    def __init__(self, latent_dims, code_dims, n_classes, img_size):
        super(Generator, self).__init__()
        
        self.negative_slope = 0.2
        self.input_dims = latent_dims + code_dims + n_classes
        self.init_size = img_size//4
        
        self.layer1 = nn.Linear(self.input_dims, 128*self.init_size**2)
        self.batchnorm1 = nn.BatchNorm2d(128)
        self.upsample1 = nn.Upsample(scale_factor=2)
        self.conv1 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
        self.batchnorm2 = nn.BatchNorm2d(128, 0.8)
        self.upsample2 = nn.Upsample(scale_factor=2)
        self.conv2 = nn.Conv2d(128, 64, 3, stride=1, padding=1)
        self.batchnorm3 = nn.BatchNorm2d(64, 0.8)
        self.conv3 = nn.Conv2d(64, 1, 3, stride=1, padding=1)
        
    def forward(self, noise, labels, code):
        
        out = torch.cat((noise, labels, code), -1)
        out = self.layer1(out)
        out = out.view(out.size(0), 128, self.init_size, self.init_size)
        out = self.upsample1(self.batchnorm1(out))
        out = F.leaky_relu_(self.batchnorm2(self.conv1(out)), self.negative_slope)
        out = self.conv2(self.upsample2(out))
        out = F.leaky_relu_(self.batchnorm3(out))
        out = F.tanh(self.conv3(out))
        return out

In [24]:
# Discriminator NN
# Convolutional layers with dropout and batch normalization
# Feature extraction and abstraction layers are same for adversarial, continuous and mutual_information objectives
# Adversarial layer - mapping to a sigmoid layer, outputting whether real/fake
# Auxiliary layer - mapping to n_classes and then a softmax layer, outputting probabilities of class label
# Latent Layer - Mapping to latent code dimensions, which can be done in two ways
# MSE with the output being the actual value of latent code
# CrossEntropy - with the output being the mean and variance of the gaussian distribution the latent code is picked from
class Discriminator(nn.Module):

    def __init__(self, img_size, n_classes, code_dims):
        super(Discriminator, self).__init__()

        self.negative_slope = 0.2

        # Discriminator block
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1)
        self.dropout2 = nn.Dropout2d(0.25)
        self.batchnorm2 = nn.BatchNorm2d(32, 0.8)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
        self.dropout3 = nn.Dropout2d(0.25)
        self.batchnorm3 = nn.BatchNorm2d(64, 0.8)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
        self.dropout4 = nn.Dropout2d(0.25)
        self.batchnorm4 = nn.BatchNorm2d(128, 0.8)

        # The height and width of downsampled image
        downsampled_size = img_size//2**4

        # Output layers
        self.adv_layer = nn.Linear(128*downsampled_size**2, 1)
        self.aux_layer = nn.Sequential(
            nn.Linear(128*downsampled_size**2, n_classes),
            nn.Softmax())
        self.latent_layer = nn.Linear(128*downsampled_size**2, code_dims)


    def forward(self, x):

        out = self.dropout1(F.leaky_relu_(self.conv1(x), self.negative_slope))
        out = self.batchnorm2(self.dropout2(F.leaky_relu_(self.conv2(out), self.negative_slope)))
        out = self.batchnorm3(self.dropout3(F.leaky_relu_(self.conv3(out), self.negative_slope)))
        out = self.batchnorm4(self.dropout4(F.leaky_relu_(self.conv4(out), self.negative_slope)))
        out = out.view(out.shape[0], -1)

        validity = self.adv_layer(out)
        label = self.aux_layer(out)
        latent_code = self.latent_layer(out)

        return validity, label, latent_code

In [25]:
adversarial_loss = nn.MSELoss()
categorical_loss = nn.CrossEntropyLoss()
continuous_loss = nn.MSELoss()
generator = Generator(latent_dim, code_dim, n_classes, img_size)
discriminator = Discriminator(img_size, n_classes, code_dim)
generator.apply(init_weights)
discriminator.apply(init_weights)
if cuda:
    generator.cuda()
    discriminator.cuda()
    adversarial_loss.cuda()
    categorical_loss.cuda()
    continuous_loss.cuda()
gen_optim = optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))
disc_optim = optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))
infogan_optim = optim.Adam(itertools.chain(generator.parameters(), discriminator.parameters()), 
                            lr = lr, betas=(b1, b2))

In [26]:
# Regularization factor
lambda_cat = 1
lambda_con = 0.1

In [27]:
# Sample random noise froma Gaussian Distribution
def noise(batch_size, latent_dim):
    n = torch.tensor(np.random.normal(0, 1, (batch_size, latent_dim)), dtype=torch.float)
    if torch.cuda.is_available():
        return n.cuda()
    else:
        return n

# Returns an array of 1's for real data
def real_data_targets(size):
    data = torch.tensor(torch.ones(size, 1), dtype=torch.float)
    if torch.cuda.is_available():
        return data.cuda()
    else:
        return data

# Returns a array of 0's for fake data
def fake_data_targets(size):
    data = torch.tensor(torch.zeros(size, 1), dtype=torch.float)
    if torch.cuda.is_available():
        return data.cuda()
    else:
        return data

# Random data labels for generator from (0,n_classes]
def fake_data_labels(size):
    labels = torch.tensor(torch.randint(low=0,high=10, size=(1,size)).view(-1), dtype=torch.long)
    if torch.cuda.is_available():
        return labels.cuda()
    else:
        return labels
    
# Sample latent code from a gaussian distribution
def code(batch_size, code_dim):
    data = torch.tensor(np.random.uniform(-1, 1, (batch_size, code_dim)), dtype=torch.float)
    if torch.cuda.is_available():
        return data.cuda()
    else:
        return data

In [28]:
def train_gen(optimizer, gen_images):
    
    optimizer.zero_grad()
    
    target = torch.tensor(real_data_targets(gen_images.size(0)))
    predictions, _, __ = discriminator(gen_images)
    loss = adversarial_loss(predictions, target)
    loss.backward()
    optimizer.step()
    return loss

In [29]:
def train_disc(optimizer, real_data, fake_data):
    
    optimizer.zero_grad()
    
    # Loss for real images
    real_targets = torch.tensor(real_data_targets(real_data.size(0)))
    real_predictions, _, __ = discriminator(real_data)
    real_loss = adversarial_loss(real_predictions, real_targets)
    
    # Loss for generated images
    fake_targets = torch.tensor(fake_data_targets(fake_data.size(0)))
    fake_predictions, _, __ = discriminator(fake_data)
    fake_loss = adversarial_loss(fake_predictions, fake_targets)
    
    total_loss = (real_loss + fake_loss)/2
    total_loss.backward()
    optimizer.step()
    
    return total_loss

In [30]:
def train_info(optimizer):
    
    optimizer.zero_grad()
    
    labels = fake_data_labels(batch_size)
    sampled_labels = torch.tensor(labels, dtype = torch.long, requires_grad=True)
    gt_labels = torch.tensor(labels, dtype=torch.long)
    
    gen_info_noise = torch.tensor(noise(batch_size, latent_dim), requires_grad=True)
    gen_info_labels = torch.tensor(to_categorical(sampled_labels.detach().numpy(), n_classes), requires_grad=True)
    gen_info_code = torch.tensor(code(batch_size, code_dim))
    
    info_imgs = generator(gen_info_noise, gen_info_labels, gen_info_code)
    _, label_predictions, code_predictions = discriminator(info_imgs)
    
    info_loss = lambda_cat*categorical_loss(label_predictions, gt_labels) + lambda_con*continuous_loss(code_predictions, gen_info_code)
    info_loss.backward()
    optimizer.step()
    
    return info_loss

In [31]:
def trainGAN(num_epochs):
    
    for epoch in range(num_epochs):
        for batch, (real_data, real_labels) in enumerate(data_loader):
            
            real_data = torch.tensor(real_data, dtype=torch.float, requires_grad=True)
            real_labels = torch.tensor(to_categorical(real_labels.numpy(), num_columns=n_classes), dtype=torch.long, requires_grad=True)
            
            # Initialize inputs
            gen_input_noise = torch.tensor(noise(batch_size, latent_dim), requires_grad=True)
            gen_input_labels = torch.tensor(to_categorical(fake_data_labels(batch_size).detach().numpy(),
                                                           num_columns=n_classes), requires_grad=True)
            gen_input_code = torch.tensor(code(batch_size, code_dim), requires_grad=True)
            
            # Train Generator
            gen_images = generator(gen_input_noise, gen_input_labels, gen_input_code)
            gen_loss = train_gen(gen_optim, gen_images)
            
            # Train Discriminator
            fake_data = gen_images.detach()
            disc_loss = train_disc(disc_optim, real_data, fake_data)
            
            # Information Loss
            info_loss = train_info(infogan_optim)

In [None]:
trainGAN(10)

  input = module(input)
