# Auto Encoder

Your very first generative model

In [1]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import utils
import wandb
wandb.init(project='auto_encoder_class')

[34m[1mwandb[0m: Currently logged in as: [33mingambe[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Compared to classification or regression, auto-encoder focus on finding a compressed representation of the data  

Typically, the input is mapped to a *lattent space* of a lesser dimension by the **encoder**  

<center>
    <img src='images/encoder.png' width='60%'/>
    <p>Source: <a href='https://fleuret.org/dlc/'>Francois Fleuret</a></p>
</center>

This compressed representation is mapped back to the higher dimension by the **decoder**

<center>
    <img src='images/decoder.png' width='60%'/>
    <p>Source: <a href='https://fleuret.org/dlc/'>Francois Fleuret</a></p>
</center>

Auto-encoders map a space to itself, learning something as close as possible to the identity function  
Because the lattent space is of lower dimension, the neural network needs to capture dependencies between components  

<center>
    <img src='images/full.png' width='60%'/>
    <p>Source: <a href='https://fleuret.org/dlc/'>Francois Fleuret</a></p>
</center>

Auto-encoders generate a compressed latent representation of the input and then uncompressed it to the original input space

<center>
    <img src='images/ae.png' width='60%'/>
    <p>Source: <a href='https://www.compthree.com/blog/autoencoder/'>CompThree</a></p>
</center>

In [2]:
def layer_init(m):
    torch.nn.init.xavier_normal_(m.weight, gain=nn.init.calculate_gain('relu'))
    torch.nn.init.constant_(m.bias, 0)
    return m

class MLPAutoencoder(nn.Module):
    def __init__(self, dim_latent):
        super().__init__()
        self.encoder = nn.Sequential(
            layer_init(nn.Linear(28 * 28, 14 * 14)),
            nn.ReLU(),
            layer_init(nn.Linear(14 * 14, 7 * 7)),
            nn.ReLU(),
            layer_init(nn.Linear(7 * 7, dim_latent)),
        )
        self.decoder = nn.Sequential(
            layer_init(nn.Linear(dim_latent,  7 * 7)),
            nn.ReLU(),
            layer_init(nn.Linear(7 * 7,  14 * 14)),
            nn.ReLU(),
            layer_init(nn.Linear(14 * 14, 28 * 28))
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
class CNNAutoencoder(nn.Module):
    def __init__(self, dim_latent):
        super().__init__()
        self.encoder = nn.Sequential( # 1x28x28
            layer_init(nn.Conv2d(1, 32, 5)), # 32x24x24
            nn.ReLU(),
            layer_init(nn.Conv2d(32, 32, 5)), # 32x20x20
            nn.ReLU(),
            layer_init(nn.Conv2d(32, 32, 4, stride=2)),  # 32x9x9
            nn.ReLU(),
            layer_init(nn.Conv2d(32, 32, 3, stride=2)), # 32x4x4
            nn.ReLU(),
            layer_init(nn.Conv2d(32, dim_latent, 4)), # dim_latentx1x1
        )
        self.decoder = nn.Sequential(
            layer_init(nn.ConvTranspose2d(dim_latent, 32, 4)),
            nn.ReLU(),
            layer_init(nn.ConvTranspose2d(32, 32, 3, stride=2)),
            nn.ReLU(),
            layer_init(nn.ConvTranspose2d(32, 32, 4, stride=2)),
            nn.ReLU(),
            layer_init(nn.ConvTranspose2d(32, 32, 5)),
            nn.ReLU(),
            layer_init(nn.ConvTranspose2d(32, 1, 5)),
        )

    def forward(self, x):
        # MLP need flatten as input and output
        # to avoid special if/else
        # I pass the flatten version and put it back
        # to normal shape here
        # output is flatten again for MSELoss
        x = x.view(x.size(0), 1, 28, 28)
        x = self.encoder(x)
        x = self.decoder(x)
        x = x.view(x.size(0), -1)
        return x
    
class PCALikeAutoencoder(nn.Module):
    def __init__(self, dim_latent):
        super().__init__()
        self.encoder = nn.Sequential(
            layer_init(nn.Linear(28 * 28, dim_latent))
        )
        self.decoder = nn.Sequential(
            layer_init(nn.Linear(dim_latent,  28 * 28))
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [3]:
batch_size = 128

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

dataset = MNIST('./data', transform=img_transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [4]:
learning_rate = 1e-3
# img size is 28*28
dim_latent = 8

#model = CNNAutoencoder(dim_latent)
model = MLPAutoencoder(dim_latent)
#model = PCALikeAutoencoder(dim_latent)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
%matplotlib inline
from tqdm.notebook import tqdm

num_epochs = 10
for epoch in range(num_epochs):
    losses = 0
    for img, label in tqdm(dataloader, unit='batch'):
        img = img.view(img.size(0), -1)
        output = model(img)
        loss = criterion(output, img)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses += loss.item()
    wandb.log({'loss': losses / len(dataloader)})

  0%|          | 0/469 [00:00<?, ?batch/s]



KeyboardInterrupt: 

In [None]:
from matplotlib import pyplot as plt

in_pic = utils.to_img(img.cpu().data)
plt.figure(figsize=(18, 6))
for i in range(4):
    plt.subplot(1,4,i+1)
    plt.imshow(in_pic[i+4])
    plt.axis('off')
    
out_pic = utils.to_img(output.cpu().data)
plt.figure(figsize=(18, 6))
for i in range(4):
    plt.subplot(1,4,i+1)
    plt.imshow(out_pic[i+4])
    plt.axis('off')