In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F

import numpy as np
from torch.distributions.multivariate_normal import MultivariateNormal

import matplotlib.pyplot as plt 
  
use_cuda = torch.cuda.is_available()

# Variational Autoencoder

The idea behind an autoencoder is to train an encoder network that produces some vector that "represents" our input, and a decoder network that generates an output given some representation vector.

The idea is to train the autoencoder to effective learn a representation of the input - it does so by minimizing the difference between the input and the output of the generated samples. 

In a variational autoencoder, we add a constraint such that the representation it creates roughly follows a unit gaussian distribution. To do this, we reparamaterize our representation into two vectors - one a vector of means and then a vector of standard deviations. 

We update the autoecnder loss function and add in a term that is the KL divergence of the representation to the unit gaussian. 

In [4]:
# my variational autoencoder

class VAE(nn.Module):
    
    def __init__(self, input_size, hidden_size, latent_size):
        self.encoder = Encoder(input_size, hidden_size, latent_size)
        self.decoder = nn.Linear(latent_vec_size, hidden_size)
        
    def forward(self, x):
        mean, logvar = self.encoder(x)
        latent = self.reparamaterize(mean, logvar)
        reconstruction = self.decoder(latent)
        
    def reparameterize(self, mean, logvar):
        if self.training:
            n = mean.dim()
            std = torch.exp(0.5*logvar)
            epsilon = MultivariateNormal(torch.zeros(n), torch.eye(n)) 
            return mean + std * epsilon
        else:
            return mean
        
class Encoder(nn.Module):
    
    def __init__(self, input_size, hidden_size, latent_size):
        
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2_mean = nn.Linear(hidden_size, latent_size)
        self.linear2_std = nn.Linear(hidden_size, latent_size)
    
    def forward(self, x):
        h1 = F.relu(self.linear1(x))
        
        return self.linear2_mean(h1), self.linear2_std(h1)
    
    
class Decoder(nn.Module):
    
    def __init__(self, input_size, hidden_size, latent_size):
        
        self.linear1 = nn.Linear(latent_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, input_size)
    
    def forward(self, x):
        h1 = F.relu(self.linear1(x))
        return F.sigmoid(self.linear2(h1))
    

def vae_loss(reconstruction, x, mean, std):
    reconstruction_loss = F.binary_cross_entropy(reconstruction, x)
    
    kl_loss = 0.5 * torch.sum(1+  logvar + mu.pow(2) - logvar.exp())
    
    return reconstruction_loss - kl_loss
    

In [4]:
import os


root = './data'
if not os.path.exists(root):
    os.mkdir(root)
    
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])

train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)

batch_size = 100

train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=False)

print('==>>> total trainning batch number: {}'.format(len(train_loader)))
print('==>>> total testing batch number: {}'.format(len(test_loader)))

NameError: name 'transforms' is not defined