In [3]:
# Creating a Stacked Auto Encoder in PyTorch on the CIFAR 10 Dataset 

In [4]:
# Importing the Required Header Files
import torch
import torch.utils.data
from torch import nn, optim
from torch.autograd import Variable
import torch.nn as nn
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image

In [5]:
# Loading the Training Data
train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('./data', train=True, download=True,
                     transform=transforms.ToTensor()),
    batch_size=128, shuffle=True)
# Using Batch Size of 128 since it performed well in previous case

Files already downloaded and verified


In [6]:
# Loading the Testing Data
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('./data', train=False, transform=transforms.ToTensor()),
    batch_size=128, shuffle=False)
# Using Batch Size of 128 since it performed well in previous case

In [7]:
# Building the Stacked Auto Encoder 

In [35]:
intermediate_size=256 # Tried sizes of 32/64/128/256
hidden_size=20

In [36]:
class SAE(nn.Module):
    def __init__(self):
        super(SAE, self).__init__()

        # Defining the Architecture of the Stacked Auto Encoder 
            
        # Encoder Part
        self.conv1 = nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0)
        self.conv3 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(16 * 16 * 32, intermediate_size)

        # Bottle-Neck Part with Latent Space
        #self.fc21 = nn.Linear(intermediate_size, hidden_size)
        #self.fc22 = nn.Linear(intermediate_size, hidden_size)

        # Decoder Part
        #self.fc3 = nn.Linear(hidden_size, intermediate_size)
        self.fc4 = nn.Linear(intermediate_size, 8192)
        self.deconv1 = nn.ConvTranspose2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.deconv2 = nn.ConvTranspose2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.deconv3 = nn.ConvTranspose2d(32, 32, kernel_size=2, stride=2, padding=0)
        self.conv5 = nn.Conv2d(32, 3, kernel_size=3, stride=1, padding=1)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def encode(self, x):
        out = self.relu(self.conv1(x))
        out = self.relu(self.conv2(out))
        out = self.relu(self.conv3(out))
        out = self.relu(self.conv4(out))
        out = out.view(out.size(0), -1)
        h1 = self.relu(self.fc1(out))
        #return self.fc21(h1), self.fc22(h1)
        return h1

    def reparameterize(self, mu, logvar):
        if self.training:
            std = logvar.mul(0.5).exp_()
            eps = Variable(std.data.new(std.size()).normal_())
            return eps.mul(std).add_(mu)
        else:
            return mu

    def decode(self, z):
        #h3 = self.relu(self.fc3(z))
        #out = self.relu(self.fc4(h3))
        out = self.relu(self.fc4(z))
        out = out.view(out.size(0), 32, 16, 16)
        out = self.relu(self.deconv1(out))
        out = self.relu(self.deconv2(out))
        out = self.relu(self.deconv3(out))
        out = self.sigmoid(self.conv5(out))
        return out

    def forward(self, x):
        #mu, logvar = self.encode(x)
        z=self.encode(x)
        #z = self.reparameterize(mu, logvar)
        #return self.decode(z), mu, logvar
        return self.decode(z)

In [37]:
# Defining a model based on the above architecture
model = SAE()

In [38]:
# Moving model to cuda to train on GPU
model.cuda()

SAE(
  (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(3, 32, kernel_size=(2, 2), stride=(2, 2))
  (conv3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=8192, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=8192, bias=True)
  (deconv1): ConvTranspose2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (deconv2): ConvTranspose2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (deconv3): ConvTranspose2d(32, 32, kernel_size=(2, 2), stride=(2, 2))
  (conv5): Conv2d(32, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu): ReLU()
  (sigmoid): Sigmoid()
)

In [39]:
# Defining an OPtimizer
optimizer = optim.RMSprop(model.parameters(), lr=0.001)

In [40]:
# Defining Loss Function with latent space
# There are 2 different losses:
# Reconstruction + Divergence
'''
def loss_function(recon_x, x, mu, logvar):
    R = F.binary_cross_entropy(recon_x.view(-1, 32 * 32 * 3),
                                 x.view(-1, 32 * 32 * 3), size_average=False)
    D = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return R + D
'''
# Below is the loss function for the without latent space part
def loss_function(recon_x,x):
    criterion = nn.BCELoss()
    return criterion(recon_x, x)

In [41]:
# Function for training the model many times
def train(epoch):
    model.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(train_loader):
        data = Variable(data)
        data = data.cuda()
        optimizer.zero_grad()
        #recon_batch, mu, logvar = model(data)
        recon_batch = model(data)
        #loss = loss_function(recon_batch, data, mu, logvar)
        loss = loss_function(recon_batch, data)
        loss.backward()
        train_loss += loss.data[0]
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.data[0] / len(data)))

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / len(train_loader.dataset)))

In [42]:
def test(epoch):
    model.eval()
    test_loss = 0
    for i, (data, _) in enumerate(test_loader):
        data = data.cuda()
        data = Variable(data, volatile=True)
        #recon_batch, mu, logvar = model(data)
        recon_batch = model(data)
        #test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
        test_loss += loss_function(recon_batch, data).data[0]
        if epoch == 9 and i == 0:
            n = min(data.size(0), 8)
            comparison = torch.cat([data[:n],
                                   recon_batch[:n]])
            save_image(comparison.data.cpu(),
                       'snapshots/conv_sae/reconstruction_1.3_'+ str(epoch) + '.png', nrow=n)

    test_loss /= len(test_loader.dataset)
    print('====> Test set loss: {:.4f}'.format(test_loss))

In [43]:
for epoch in range(10):
    train(epoch)
    test(epoch)
    '''
    if epoch == 9:
        sample = Variable(torch.randn(64, hidden_size))
        sample = sample.cuda()
        sample = model.decode(sample).cpu()
        save_image(sample.data.view(64, 3, 32, 32),
                   'snapshots/sample_1_' + str(epoch) + '.png')
    '''

  


====> Epoch: 0 Average loss: 0.0051


  
  # Remove the CWD from sys.path while we load stuff.


====> Test set loss: 0.0048
====> Epoch: 1 Average loss: 0.0047
====> Test set loss: 0.0047
====> Epoch: 2 Average loss: 0.0047
====> Test set loss: 0.0047
====> Epoch: 3 Average loss: 0.0046
====> Test set loss: 0.0046
====> Epoch: 4 Average loss: 0.0046
====> Test set loss: 0.0046
====> Epoch: 5 Average loss: 0.0046
====> Test set loss: 0.0046
====> Epoch: 6 Average loss: 0.0045
====> Test set loss: 0.0046
====> Epoch: 7 Average loss: 0.0045
====> Test set loss: 0.0046
====> Epoch: 8 Average loss: 0.0045
====> Test set loss: 0.0046
====> Epoch: 9 Average loss: 0.0045
====> Test set loss: 0.0046
