# Clase extra: Autoencoders


## 1. Autoenconder simple

Cargarmos las librerías para ejecutar los experimentos

In [1]:
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image

Definimos una función para transformar un vector de entrada en una imagen. (Esto por que la base de datos MNIST no contiene imágenes sino vectores de imágenes)

In [2]:
def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x

Implementamos una __transformer__ para normalizar la imagen.

In [3]:
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

Cargamos la base de datos MNIST y construimos un _dataloader_ para cargar en _batch_ los ejemplos. 

In [4]:
batch_size = 128

dataset = MNIST('./data', download=True, transform=img_transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



Definimos el modelo de autoencoder.

In [5]:
class AutoencoderSimple(nn.Module):
    def __init__(self):
        super(AutoencoderSimple, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(28 * 28, 128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True), 
            nn.Linear(64, 12), 
            nn.ReLU(True), 
            nn.Linear(12, 3))
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.ReLU(True),
            nn.Linear(12, 64),
            nn.ReLU(True),
            nn.Linear(64, 128),
            nn.ReLU(True), nn.Linear(128, 28 * 28), nn.Tanh())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

### Entrenamiento

Definimos cuales son los hiperparámetros para el algoritmo de optimización definido.

In [6]:
num_epochs = 100
learning_rate = 1e-3

Definimos el algoritmo de optimización

In [7]:
model = AutoencoderSimple().cuda()

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

Entrenamiento



In [8]:
for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = img.view(img.size(0), -1)
        img = Variable(img).cuda()
        # ===================forward=====================
        output = model(img)
        loss = criterion(output, img)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log========================
    print(f'epoch [{epoch+1}/{num_epochs}] loss: {loss.item()} size: {output.shape}')
    if epoch % 10 == 0:
        pic_out = to_img(output.cpu().data)
        pic_inp = to_img(img.cpu().data)
        save_image(pic_inp, f'/content/image-inp_{epoch}.png')
        save_image(pic_out, f'/content/image-out_{epoch}.png')

epoch [1/100] loss: 0.18970146775245667 size: torch.Size([96, 784])
epoch [2/100] loss: 0.18039152026176453 size: torch.Size([96, 784])
epoch [3/100] loss: 0.1558837592601776 size: torch.Size([96, 784])
epoch [4/100] loss: 0.16556891798973083 size: torch.Size([96, 784])
epoch [5/100] loss: 0.14936776459217072 size: torch.Size([96, 784])
epoch [6/100] loss: 0.14701464772224426 size: torch.Size([96, 784])
epoch [7/100] loss: 0.12891629338264465 size: torch.Size([96, 784])
epoch [8/100] loss: 0.14842307567596436 size: torch.Size([96, 784])
epoch [9/100] loss: 0.1262117177248001 size: torch.Size([96, 784])
epoch [10/100] loss: 0.1362428218126297 size: torch.Size([96, 784])
epoch [11/100] loss: 0.1277216672897339 size: torch.Size([96, 784])
epoch [12/100] loss: 0.1356513351202011 size: torch.Size([96, 784])
epoch [13/100] loss: 0.12361467629671097 size: torch.Size([96, 784])
epoch [14/100] loss: 0.14186862111091614 size: torch.Size([96, 784])
epoch [15/100] loss: 0.12808160483837128 size: t

Guardar el modelo

In [None]:
torch.save(model.state_dict(), './sim_autoencoder.pth')