## Exercise 1 Autoencoder

# What the exercise asked for:
Train a convolutional autoencoder on MNIST, study the influence of the bottleneck size and generate some images.

#The steps to be followed:
1. Load MNIST train and test sets. Split the original training data into 95% training and 5% validation data.
2. Implement a convolutional autoencoder (with separate Encoder and Decoder modules).
3. Train the convolutional autoencoder, with different bottleneck sizes. Plot the train and validation loss curves of all autoencoders in the same figure.
4. Compute the avg. image reconstruction error (MSE) of the trained models on the MNIST validation and test sets. Show the results in a table, including #params of each model.
5. Select one of the autoencoders and feed it 5 random MNIST images from the test set. Show them along with their reconstructions.
6. Generate 5 new images by injecting random values as input to the decoder. Show them.

In [0]:
import torch
from torchvision import datasets, transforms
import torch.optim as optim
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler

import torch.nn as nn
import torch.nn.functional as F

In [0]:
hparams = {
    'batch_size':20, #estava a 64
    'num_epochs':5,
    'test_batch_size':64,
    'hidden_size':128,
    'num_classes':10,
    'num_inputs':784, # això és el numero de píxels per imatge (28x28)
    'learning_rate':1e-3,
    'log_interval':100,
}

hparams['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

In [0]:
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.1307,), (0.3081,))
                                ]))
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.1307,), (0.3081,))
                                ]))

validation_split = 0.05
random_seed= 30
dataset_size = len(mnist_trainset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

# Shuffle dataset
np.random.seed(random_seed)
np.random.shuffle(indices)

# Get samples indices
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(mnist_trainset,
                                           batch_size=hparams['batch_size'], 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(mnist_trainset,
                                                batch_size=hparams['batch_size'],
                                                sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(mnist_testset,
                                          batch_size=hparams['batch_size'],
                                          shuffle=False)

In [0]:
class ConvEncoder(nn.Module):
  def __init__(self,bottleneck):
    super().__init__()
    '''
    self.encoder = nn.Sequential( # entrada: imatge 28x28
        nn.Conv2d(1,32,4, stride=2, padding=1) # capa conv: 32 filtres 4x4 amb stride=2 i padding=1, la sortida és 32x14x14
        nn.ReLU()
        nn.Conv2d(32,64,4,stride=2, padding=1) # capa conv: 64 filtres 4x4 amb stride=2 i padding=1, la sortida és 64x7x7
        nn.ReLU()
        nn.Conv2d(64,128,5,stride=2, padding=1) # capa conv: 128 filtres 5x5 amb stride=2 i padding=1, la sortida és 128x3x3
        nn.ReLU()
    )
    '''
    self.encoder = nn.Sequential( # entrada: imatge 28x28
        nn.Conv2d(1,32,2, stride=2), # capa conv: 32 filtres 2x2 amb stride=2 i padding=0, la sortida és 32x14x14
        nn.ReLU(),
        nn.Conv2d(32,64,2,stride=2), # capa conv: 64 filtres 2x2 amb stride=2 i padding=0, la sortida és 64x7x7
        nn.ReLU(),
        nn.Conv2d(64,128,3,stride=2), # capa conv: 128 filtres 3x3 amb stride=2 i padding=0, la sortida és 128x3x3
        nn.ReLU()
    )
    self.mlp = nn.Sequential(
        nn.Linear(1152,bottleneck), #128x3x3
        nn.ReLU()
    )

  def forward(self, x):
    x = self.encoder(x)
    x= x.view(x.shape[0], -1)
    x = self.mlp(x)
    return x

In [0]:
class ConvDecoder(nn.Module):
  def __init__(self,bottleneck):
    super().__init__()
    self.mlp = nn.Sequential(
        nn.Linear(bottleneck,1152),
        nn.ReLU()
    )
    self.decoder = nn.Sequential(
        nn.ConvTranspose2d(128,64,3, stride=2),
        nn.ReLU(),
        nn.ConvTranspose2d(64,32,2, stride=2),
        nn.ReLU(),
        nn.ConvTranspose2d(32,1,2, stride=2),
        nn.ReLU()
    )
    
  def forward(self, x):
    x = self.mlp(x)
    x = x.view(x.shape[0], 128, 3, 3)
    x = self.decoder(x)
    return x

In [0]:
class ConvAutoencoder(nn.Module):
  def __init__(self, bottleneck):
    super().__init__()
    self.encoder = ConvEncoder(bottleneck)
    self.decoder = ConvDecoder(bottleneck)

  def forward(self, x):
    x = self.encoder(x)
    x = self.decoder(x)
    return x

In [0]:
from torch.autograd import Variable

def train_epoch(train_loader, network, optimizer, criterion, hparams):
  # Activate the train=True flag inside the model
  #network.train()
  device = hparams['device']
  for epoch in range(hparams['num_epochs']):
    avg_loss=0.0
    num=0
    for data in train_loader: #lo de enumerate fa que peti
      img, _ = data
      img= img.to(device)
      output = network(img)
      loss = criterion(output, img)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      avg_loss += loss.item()
      num+=1
      if num % hparams['log_interval'] == 0:
        print('epoch [{}/{}], [{}/{} ({:.0f}%)]\tloss:{:.4f}'.format(epoch+1, hparams['num_epochs'], num * len(data), len(train_loader.dataset),
              100. * num / len(train_loader), loss.item()))
    avg_loss = avg_loss/len(train_loader)
    print('Epoch: {} \t Average Training Loss: {:.6f}'.format(epoch+1, avg_loss))
  return avg_loss


def test_epoch(test_loader, network, criterion, hparams):
    #network.eval()
    device = hparams['device']
    test_loss = 0
    acc = 0
    with torch.no_grad():
        for data in test_loader:
            img, _ = data
            img= img.to(device)
            output = network(img)
            test_loss += criterion(output, img, reduction='sum').item() # sum up batch loss
            # compute number of correct predictions in the batch
            #acc += correct_predictions(output, data)
    # Average acc across all correct predictions batches now
    test_loss /= len(test_loader.dataset)
    #test_acc = 100. * acc / len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f} \n'.format(test_loss))
    return test_loss

In [0]:
# Init lists to save the evolution of the training & test losses/accuracy.
train_losses = []
test_losses = []
test_accs = []
bottleneck=100
network = ConvAutoencoder(bottleneck)
network.to(hparams['device'])
optimizer = optim.Adam(network.parameters(),
                       lr=hparams['learning_rate'])
criterion = F.mse_loss

for epoch in range(1, hparams['num_epochs'] + 1):
  train_losses.append(train_epoch(train_loader, network, optimizer, criterion, hparams))
  test_loss=test_epoch(validation_loader, network, criterion, hparams)
  test_losses.append(test_loss)
  #test_accs.append(test_accuracy)



Epoch: 1 	 Average Training Loss: 0.999882
Epoch: 2 	 Average Training Loss: 0.999882
Epoch: 3 	 Average Training Loss: 0.999882


In [0]:
plt.figure(figsize=(10, 8))
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.plot(tr_losses, label='train')
plt.plot(te_losses, label='test')
plt.legend()

In [0]:
#plot the reconstructed images