# **Exercise 2. Autoencoder**

Second exercise for the Project of DLAI Fall semester 2019/2020.

Authors:
* Carlos Alejandro López Molina
* María González i Calabuig
* Roser Batlle Roca
* Jordi Biosca Caro

Import of different libraries used to build the script:

In [0]:
import torch

import numpy as np
import torchvision as tv
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torchvision.utils import save_image
from torchvision import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from torch.utils.data.sampler import SubsetRandomSampler
from tabulate import tabulate
from torch.utils import data 
import random
from sklearn.metrics import accuracy_score


seed = 1
torch.manual_seed(seed)
np.random.seed(seed)

Definition of hyper-parameters:

In [0]:
hparams = {
    'batch_size': 64,
    'num_epochs': 5,
    'test_batch_size': 64,
    'hidden_size': 64,
    'hidden_size_2': 128,
    'hidden_size_3': 256,
    'num_classes': 10,
    'num_inputs': 1, 
    'learning_rate': 1e-3,
    'log_interval': 100,
    'num_workers': 4,
    'kernel_size': 3,
    'encoder_key': [1, 5, 25, 50, 100],
    'subset_size': 100
}

hparams['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

**STEP 1** - Select 100 images from MNIST dataset

In [3]:
mnist_trainset = datasets.MNIST('data', train=True, download=True,
                                transform=transforms.Compose([
                                    transforms.ToTensor()
                                ]))
mnist_testset = datasets.MNIST('data', train=False, 
                               transform=transforms.Compose([
                                   transforms.ToTensor()
                               ]))

 
train_loader = torch.utils.data.DataLoader(mnist_trainset, 
                                           hparams['batch_size'], 
                                           shuffle=False,
                                           num_workers=hparams['num_workers'], 
                                           )

test_loader = torch.utils.data.DataLoader(mnist_trainset, 
                                          1, 
                                          shuffle=False,
                                          num_workers=hparams['num_workers'], 
                                          )

#samples_train = torch.utils.data.Subset(train_loader, 100)
#samples_test = torch.utils.data.Subset(test_loader, 100)




0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:02, 4034124.97it/s]                             


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw


0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 57860.41it/s]                           
0it [00:00, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:01, 977841.78it/s]                             
0it [00:00, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 21929.45it/s]            

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw
Processing...
Done!





In [0]:
n1 = np.arange(len(mnist_trainset))
n1 = np.random.choice(n1,100)
#small_mnist_trainset = mnist_trainset[n1]
n2 = np.arange(len(mnist_testset))
n2 = np.random.choice(n2,1000)
#small_mnist_testset = mnist_testset[n2]


small_trainloader = torch.utils.data.DataLoader(mnist_trainset, 
                                           hparams['batch_size'], 
                                           shuffle=False,
                                           num_workers=hparams['num_workers'],
                                           sampler= SubsetRandomSampler(n1)
                                           )

small_testloader = torch.utils.data.DataLoader(mnist_testset, 
                                          hparams['batch_size'], 
                                          shuffle=False,
                                          num_workers=hparams['num_workers'], 
                                          sampler=SubsetRandomSampler(n2)
                                          )

**STEP 2** - Reusing the Encoder of the Autoencoder from Ex1. 
Bottleneck size = 25

In [0]:
class Reshape(nn.Module):
    def __init__(self, *args):
        super(Reshape, self).__init__()
        self.shape = args

    def forward(self, x):
        return x.view(self.shape)


class ConvAutoEncoder(nn.Module):

  def __init__(self, encoder_key):
    super(ConvAutoEncoder,self).__init__()

    self.encoder = nn.Sequential(
        nn.Conv2d(hparams['num_inputs'], hparams['hidden_size'], 4, padding=3, stride=2),
        nn.ReLU(True),
        nn.Conv2d(hparams['hidden_size'], hparams['hidden_size_2'], 4, padding=1, stride=2),
        nn.ReLU(True),
        nn.Conv2d(hparams['hidden_size_2'], hparams['hidden_size_3'], 4, stride=2),
        nn.ReLU(True),
        nn.MaxPool2d(2, stride=1),
        nn.ReLU(True),
        nn.Flatten(),
        nn.Linear(2*2*hparams['hidden_size_3'], encoder_key)
    )

    self.decoder = nn.Sequential(
        nn.Linear(encoder_key, 2*2*hparams['hidden_size_3']),
        nn.ReLU(True),
        Reshape(-1,hparams['hidden_size_3'], 2, 2),
        nn.ConvTranspose2d(hparams['hidden_size_3'], hparams['hidden_size_2'], 3, stride=2),
        nn.ReLU(True),
        nn.ConvTranspose2d(hparams['hidden_size_2'], hparams['hidden_size'], 5, stride=3, padding=1),
        nn.ReLU(True),
        nn.ConvTranspose2d(hparams['hidden_size'], hparams['num_inputs'], 2, stride=2, padding=1),
        nn.Tanh(),
    )

  def forward(self,x):
    x = self.encoder(x)
    x = self.decoder(x)
    return x

In [0]:
losses_train = []
losses_test = []
model = ConvAutoEncoder(hparams['encoder_key'][2])
model = model.to(hparams['device'])
distance = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=hparams['learning_rate'], weight_decay=1e-5)
for epoch in range (hparams['num_epochs']):
  for batch_id, data in enumerate(train_loader):
    img, _ = data
    img = img.to(hparams['device'])
    # Forward pass
    output = model(img)
    loss = distance(output, img)
    losses_train.append(loss.cpu().data.item())
    # Backward pass
    optimizer.zero_grad() 
    loss.backward()
    optimizer.step()
    # Log message
    print('TRAIN: epoch[{}/{}], loss:{:.4f}'.format(epoch+1, hparams['num_epochs'], loss.item()))
  for data in test_loader:
    img, _ = data
    img = img.to(hparams['device'])
    # Forward pass
    output = model(img)
    loss = distance(output, img)
    losses_test.append(loss.cpu().data.item())
   # Log message
    print('TEST: epoch[{}/{}], loss:{:.4f}'.format(epoch+1, hparams['num_epochs'], loss.item()))

TRAIN: epoch[1/5], loss:0.1019
TRAIN: epoch[1/5], loss:0.0923
TRAIN: epoch[1/5], loss:0.0626
TRAIN: epoch[1/5], loss:0.0951
TRAIN: epoch[1/5], loss:0.0430
TRAIN: epoch[1/5], loss:0.0516
TRAIN: epoch[1/5], loss:0.0537
TRAIN: epoch[1/5], loss:0.0518
TRAIN: epoch[1/5], loss:0.0460
TRAIN: epoch[1/5], loss:0.0359
TRAIN: epoch[1/5], loss:0.0393
TRAIN: epoch[1/5], loss:0.0451
TRAIN: epoch[1/5], loss:0.0411
TRAIN: epoch[1/5], loss:0.0373
TRAIN: epoch[1/5], loss:0.0349
TRAIN: epoch[1/5], loss:0.0352
TRAIN: epoch[1/5], loss:0.0330
TRAIN: epoch[1/5], loss:0.0335
TRAIN: epoch[1/5], loss:0.0360
TRAIN: epoch[1/5], loss:0.0358
TRAIN: epoch[1/5], loss:0.0342
TRAIN: epoch[1/5], loss:0.0371
TRAIN: epoch[1/5], loss:0.0391
TRAIN: epoch[1/5], loss:0.0361
TRAIN: epoch[1/5], loss:0.0334
TRAIN: epoch[1/5], loss:0.0327
TRAIN: epoch[1/5], loss:0.0328
TRAIN: epoch[1/5], loss:0.0340
TRAIN: epoch[1/5], loss:0.0339
TRAIN: epoch[1/5], loss:0.0359
TRAIN: epoch[1/5], loss:0.0366
TRAIN: epoch[1/5], loss:0.0350
TRAIN: e

Saving the weights of the autoencoder:

In [0]:
pretrained_dict = model.state_dict()

torch.save(pretrained_dict, 'weights_encoder.pt')

**STEP 3** - Digit Classifier

In [0]:
class Classifier(nn.Module):

  def __init__(self):
    super(Classifier,self).__init__()

    self.encoder = nn.Sequential(
        nn.Conv2d(hparams['num_inputs'], hparams['hidden_size'], 4, padding=3, stride=2),
        nn.ReLU(True),
        nn.Conv2d(hparams['hidden_size'], hparams['hidden_size_2'], 4, padding=1, stride=2),
        nn.ReLU(True),
        nn.Conv2d(hparams['hidden_size_2'], hparams['hidden_size_3'], 4, stride=2),
        nn.ReLU(True),
        nn.MaxPool2d(2, stride=1),
        nn.ReLU(True),
        nn.Flatten(),
        nn.Linear(2*2*hparams['hidden_size_3'], 25)
    )

    self.fc = nn.Sequential(
        nn.Linear(25,10),
        nn.Softmax(dim=1)
    )
  
  def forward(self,x):
    bsz = x.shape[0]
    x = self.encoder(x)
    x = x.view(bsz,-1)
    x = self.fc(x)
    return x


**STEP 4** - Pre-Training. 

In [0]:
model = Classifier()
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
model = model.to(hparams['device'])

model.encoder.requires_grad_=False


In [0]:
distance = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=hparams['learning_rate'], weight_decay=1e-5)

for epoch in range (hparams['num_epochs']):
  for batch_id, data in enumerate(small_trainloader):
    img, groundtruth = data
    img = img.to(hparams['device'])
    groundtruth = groundtruth.to(hparams['device'])
    # Forward pass
    output = model(img)
    loss = distance(output, groundtruth)
    losses_train.append(loss.cpu().data.item())
    # Backward pass
    optimizer.zero_grad() 
    loss.backward()
    optimizer.step()
    # Log message
    print('TRAIN: epoch[{}/{}], loss:{:.4f}'.format(epoch+1, hparams['num_epochs'], loss.item()))
  accuracy = 0

  for data in small_testloader:
    img, groundtruth = data
    img = img.to(hparams['device'])
    groundtruth = groundtruth.to(hparams['device'])
    # Forward pass
    output = model(img)
    _, pred = output.max(dim=1)
    loss = distance(output,groundtruth)
    losses_test.append(loss.cpu().data.item())
    #accuracy += int(pred == groundtruth)/len(test_loader)
    # Log message
    print('TEST: epoch[{}/{}], loss:{:.4f}'.format(epoch+1, hparams['num_epochs'], loss.item()))
    #print(accuracy)



torch.Size([64, 10])
TRAIN: epoch[1/5], loss:2.2990
torch.Size([36, 10])
TRAIN: epoch[1/5], loss:2.2977
TEST: epoch[1/5], loss:2.2985
TEST: epoch[1/5], loss:2.3330
TEST: epoch[1/5], loss:2.3230
TEST: epoch[1/5], loss:2.3393
TEST: epoch[1/5], loss:2.3175
TEST: epoch[1/5], loss:2.3113
TEST: epoch[1/5], loss:2.3032
TEST: epoch[1/5], loss:2.2971
TEST: epoch[1/5], loss:2.3340
TEST: epoch[1/5], loss:2.3423
TEST: epoch[1/5], loss:2.3332
TEST: epoch[1/5], loss:2.2817
TEST: epoch[1/5], loss:2.3374
TEST: epoch[1/5], loss:2.3227
TEST: epoch[1/5], loss:2.3565
TEST: epoch[1/5], loss:2.2754
torch.Size([64, 10])
TRAIN: epoch[2/5], loss:2.2374
torch.Size([36, 10])
TRAIN: epoch[2/5], loss:2.3244
TEST: epoch[2/5], loss:2.2927
TEST: epoch[2/5], loss:2.3541
TEST: epoch[2/5], loss:2.3590
TEST: epoch[2/5], loss:2.3414
TEST: epoch[2/5], loss:2.4126
TEST: epoch[2/5], loss:2.3283
TEST: epoch[2/5], loss:2.3949
TEST: epoch[2/5], loss:2.3625
TEST: epoch[2/5], loss:2.3542
TEST: epoch[2/5], loss:2.3501
TEST: epoch[

Pre-trained net Results:

In [0]:
groundtruths = []
preds = []

accuracy = 0

for data in small_testloader:
    img, groundtruth = data
    img = img.to(hparams['device'])
    groundtruths.append(groundtruth)
    groundtruth = groundtruth.to(hparams['device'])
    # Forward pass
    output = model(img)
    _, pred = output.max(dim=1)
    preds.append(pred)
for j in range(len(pred)):
    accuracy += int(pred[j] == groundtruth[j])/len(pred)

accuracy_pre = accuracy/len(small_testloader)

0.11621093749999997


**STEP 5** - Fine-Tuning: We tune the encoder part with a learning rate 'lr=1e-4' and we train the fully connected layer with 'lr=1e-2'

In [0]:
optimizer = torch.optim.SGD([
                {'params': model.encoder.parameters()},
                {'params': model.fc.parameters(), 'lr': 1e-2}
            ], lr=1e-4, momentum=0.99)

for epoch in range (hparams['num_epochs']):
  for batch_id, data in enumerate(small_trainloader):
    img, groundtruth = data
    img = img.to(hparams['device'])
    groundtruth = groundtruth.to(hparams['device'])
    # Forward pass
    output = model(img)
    print(output.shape)
    loss = distance(output, groundtruth)
    losses_train.append(loss.cpu().data.item())
    # Backward pass
    optimizer.zero_grad() 
    loss.backward()
    optimizer.step()
    # Log message
    print('TRAIN: epoch[{}/{}], loss:{:.4f}'.format(epoch+1, hparams['num_epochs'], loss.item()))
  accuracy = 0

  for data in small_testloader:
    img, groundtruth = data
    img = img.to(hparams['device'])
    groundtruth = groundtruth.to(hparams['device'])
    # Forward pass
    output = model(img)
    _, pred = output.max(dim=1)
    loss = distance(output,groundtruth)
    losses_test.append(loss.cpu().data.item())
    #accuracy += int(pred == groundtruth)/len(test_loader)
    # Log message
    print('TEST: epoch[{}/{}], loss:{:.4f}'.format(epoch+1, hparams['num_epochs'], loss.item()))
    #print(accuracy)

torch.Size([64, 10])
TRAIN: epoch[1/5], loss:2.2326
torch.Size([36, 10])
TRAIN: epoch[1/5], loss:2.1593
TEST: epoch[1/5], loss:2.3171
TEST: epoch[1/5], loss:2.2844
TEST: epoch[1/5], loss:2.3316
TEST: epoch[1/5], loss:2.2593
TEST: epoch[1/5], loss:2.3237
TEST: epoch[1/5], loss:2.2596
TEST: epoch[1/5], loss:2.3181
TEST: epoch[1/5], loss:2.3138
TEST: epoch[1/5], loss:2.3151
TEST: epoch[1/5], loss:2.3122
TEST: epoch[1/5], loss:2.3445
TEST: epoch[1/5], loss:2.3193
TEST: epoch[1/5], loss:2.3127
TEST: epoch[1/5], loss:2.2967
TEST: epoch[1/5], loss:2.3020
TEST: epoch[1/5], loss:2.2201
torch.Size([64, 10])
TRAIN: epoch[2/5], loss:2.2170
torch.Size([36, 10])
TRAIN: epoch[2/5], loss:2.1852
TEST: epoch[2/5], loss:2.3170
TEST: epoch[2/5], loss:2.3621
TEST: epoch[2/5], loss:2.3379
TEST: epoch[2/5], loss:2.3132
TEST: epoch[2/5], loss:2.2880
TEST: epoch[2/5], loss:2.3276
TEST: epoch[2/5], loss:2.3028
TEST: epoch[2/5], loss:2.2785
TEST: epoch[2/5], loss:2.2986
TEST: epoch[2/5], loss:2.2905
TEST: epoch[

Fine-tuning results: 

In [0]:
groundtruths = []
preds = []

accuracy = 0

for data in test_loader:
    img, groundtruth = data
    img = img.to(hparams['device'])
    groundtruths.append(groundtruth)
    groundtruth = groundtruth.to(hparams['device'])
    # Forward pass
    output = model(img)
    _, pred = output.max(dim=1)
    preds.append(pred)
    
for j in range(len(pred)):
    accuracy += int(pred[j] == groundtruth[j])/len(pred)

accuracy_tuning = accuracy/len(small_testloader)

**STEP 6:**From scratch

In [0]:
model.encoder.requires_grad_=True

for epoch in range (hparams['num_epochs']):
  for batch_id, data in enumerate(small_trainloader):
    img, groundtruth = data
    img = img.to(hparams['device'])
    groundtruth = groundtruth.to(hparams['device'])
    # Forward pass
    output = model(img)
    print(output.shape)
    loss = distance(output, groundtruth)
    losses_train.append(loss.cpu().data.item())
    # Backward pass
    optimizer.zero_grad() 
    loss.backward()
    optimizer.step()
    # Log message
    print('TRAIN: epoch[{}/{}], loss:{:.4f}'.format(epoch+1, hparams['num_epochs'], loss.item()))
  accuracy = 0

  for data in small_testloader:
    img, groundtruth = data
    img = img.to(hparams['device'])
    groundtruth = groundtruth.to(hparams['device'])
    # Forward pass
    output = model(img)
    _, pred = output.max(dim=1)
    loss = distance(output,groundtruth)
    losses_test.append(loss.cpu().data.item())
    #accuracy += int(pred == groundtruth)/len(test_loader)
    # Log message
    print('TEST: epoch[{}/{}], loss:{:.4f}'.format(epoch+1, hparams['num_epochs'], loss.item()))
    #print(accuracy)



torch.Size([64, 10])
TRAIN: epoch[1/5], loss:2.2308
torch.Size([36, 10])
TRAIN: epoch[1/5], loss:2.1578
TEST: epoch[1/5], loss:2.2815
TEST: epoch[1/5], loss:2.2643
TEST: epoch[1/5], loss:2.3346
TEST: epoch[1/5], loss:2.3595
TEST: epoch[1/5], loss:2.3277
TEST: epoch[1/5], loss:2.2489
TEST: epoch[1/5], loss:2.3434
TEST: epoch[1/5], loss:2.3278
TEST: epoch[1/5], loss:2.3184
TEST: epoch[1/5], loss:2.3375
TEST: epoch[1/5], loss:2.3315
TEST: epoch[1/5], loss:2.2857
TEST: epoch[1/5], loss:2.3427
TEST: epoch[1/5], loss:2.2755
TEST: epoch[1/5], loss:2.2891
TEST: epoch[1/5], loss:2.2489
torch.Size([64, 10])
TRAIN: epoch[2/5], loss:2.1729
torch.Size([36, 10])
TRAIN: epoch[2/5], loss:2.2582
TEST: epoch[2/5], loss:2.3108
TEST: epoch[2/5], loss:2.2966
TEST: epoch[2/5], loss:2.3442
TEST: epoch[2/5], loss:2.3050
TEST: epoch[2/5], loss:2.3174
TEST: epoch[2/5], loss:2.3222
TEST: epoch[2/5], loss:2.3154
TEST: epoch[2/5], loss:2.3655
TEST: epoch[2/5], loss:2.2771
TEST: epoch[2/5], loss:2.2148
TEST: epoch[

From scratch results:

In [0]:
groundtruths = []
preds = []
accuracy = 0

for data in test_loader:
    img, groundtruth = data
    img = img.to(hparams['device'])
    groundtruths.append(groundtruth)
    groundtruth = groundtruth.to(hparams['device'])
    # Forward pass
    output = model(img)
    _, pred = output.max(dim=1)
    preds.append(pred)

for j in range(len(pred)):
    accuracy += int(pred[j] == groundtruth[j])/len(pred)

accuracy_scratch = accuracy/len(small_testloader)
 

**ACCURACY RESULTS**


In [0]:
print("Pre-trained model:")
print(accuracy_pre)
print("Fine-tuned model:")
print(accuracy_tuning)
print("Model trained from scratch:")
print(accuracy_scratch)

Pre-trained model:
0.13205
Fine-tuned model:
0.09873333333333334
Model trained from scratch:
0.1415
