# Auto-Encoder in Pytorch

In this tutorial we will introduce and implement auto-encoders. 
[paper](http://www.icml-2011.org/papers/455_icmlpaper.pdf)

In [1]:
%matplotlib inline
%config IPCompleter.greedy=True

In [23]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import grad
from torch.utils.data import TensorDataset, DataLoader
from pckutils import mnist
import json

## Checking gradients of CAE

In [3]:
class BaseAutoEncoder(nn.Module):
    
    def __init__(self, in_f, out_f):
        super(BaseAutoEncoder, self).__init__()
        self.in_fetaures = in_f
        self.out_features = out_f
        
        # important to create here all the function which contains parameters to be trained
        # otherwise model.parameters() would return an empty list
        self.lin_enc = nn.Linear(self.in_fetaures, self.out_features)
        self.lin_dec = nn.Linear(self.out_features, self.in_fetaures)
    
    def forward(self, x):
        x.requires_grad=True
        self.y_enc = torch.sigmoid(self.lin_enc(x))
        self.jacobi_term = self.__jacobi(x, self.y_enc)
        y_out = torch.sigmoid(self.lin_dec(self.y_enc))
        return y_out
    
    def __jacobi(self, x, y):
        j_frobenius = 0.0
        for b in range(x.size(0)):
            for i in range(self.out_features):
                # creat_graph is important because we will calculate the gradient of j_frobenius according to model parameters
                gradients = grad(y[b, i], x, retain_graph=True, create_graph=True)[0] 
                j_frobenius += gradients.pow(2).sum()
        return j_frobenius

In [4]:
x = torch.rand((1, 20), requires_grad=True)
bae = BaseAutoEncoder(20, 5)
y = bae(x)

In [5]:
bae.jacobi_term

tensor(0.0933, grad_fn=<ThAddBackward>)

In [6]:
ws = []
for p in bae.parameters():
    ws.append(p.data)

In [7]:
criterion = nn.MSELoss()
loss = criterion(y, x) + bae.jacobi_term
bae.zero_grad()
loss.backward()

In [8]:
# derivative of Jacobi term according to W and b
w = ws[0]
w.requires_grad=True
b = ws[1]
b.requires_grad=True

s = torch.sigmoid(torch.matmul(w, x[0]) + b)
ou = torch.matmul(torch.diag(s * (1-s)), w).norm().pow(2) # manual calculation of the Frobenius norm of the Jacobian
j_w = grad(ou, w, retain_graph=True)[0]
j_b = grad(ou, b, retain_graph=True)[0]

In [9]:
# derivative according to the decoder W and b parameters
sigm_deriv = y[0] * (1-y[0])
sigm_jacobi = torch.diag(sigm_deriv)
dec_b = torch.matmul(y[0]-x[0], sigm_jacobi) * 2
dec_w = torch.ger(dec_b, bae.y_enc[0])

# derivative according to the encoder W and b parameters
delta = torch.matmul(torch.matmul((y[0]-x[0]), sigm_jacobi), ws[2])
enc_b = torch.matmul(delta, torch.diag(bae.y_enc[0] * (1-bae.y_enc[0]))) * 2
enc_w = torch.ger(enc_b, x[0])

In [10]:
for p, calc_grad in zip(bae.parameters(), [enc_w + j_w, enc_b + j_b, dec_w, dec_b]):
    assert torch.le(torch.abs(p.grad - calc_grad), 1e-7).sum() == calc_grad.numel()
print('Gradients are OK!')

Gradients are OK!


## MNIST example for CAE

Unfortunately, calculating the gradient with autograd is too time consuming. To see this lets try it on the following example with randomly generated inputs.

To speed up we need calculating the gradient by input manually in **batch** mode. 

In [11]:
class MNISTautoencoder(nn.Module):
    
    def __init__(self, in_f, out_f):
        super(MNISTautoencoder, self).__init__()
        
        self.lin_enc = nn.Linear(in_f, out_f)
        self.lin_dec = nn.Linear(out_f, in_f)
        
    def forward(self, x):
        '''
        x - batch_size x in_f
        '''
        y_encoded = torch.sigmoid(self.lin_enc(x))
        self.jacobi_loss = self.jacobi_loss_calc(x, y_encoded)
        y_out = torch.sigmoid(self.lin_dec(y_encoded))
        return y_out
    
    def jacobi_loss_calc(self, x, y):
        sigmoid_der = y * (1-y)
        w = list(self.lin_enc.parameters())[0]
        sigmoid_der_2 = sigmoid_der**2
        w_2 = w**2
        return torch.sum(torch.matmul(sigmoid_der_2, w_2))

### Checking if this simpler form is right

In [12]:
x = torch.rand((32, 20))*20
mae = MNISTautoencoder(20, 5)
bae = BaseAutoEncoder(20, 5)

for mae_p, bae_p in zip(mae.parameters(), bae.parameters()):
    mae_p.data = bae_p.data

In [19]:
mae_y = mae(x).data
bae_y = bae(x).data
torch.eq(mae_y, bae_y).sum() == torch.numel(mae_y)

tensor(1, dtype=torch.uint8)

In [20]:
mae.jacobi_loss.item()

0.5907829999923706

In [21]:
bae.jacobi_term.item()

0.5907829999923706

### Loading MNIST

In [None]:
data = mnist.load_mnist()

In [None]:
X = torch.Tensor(data.X_train)
tensors = TensorDataset(X)
trainloader = DataLoader(tensors, batch_size=32, shuffle=True)

In [None]:
mae = MNISTautoencoder(28*28, 200)
criterion = nn.MSELoss()
optimizer = optim.Adam(mae.parameters(), lr=1e-2)
running_loss = 0.0

for epoch in range(5):
    for i, x in enumerate(trainloader, 1):
        
        y = mae(x)
        loss = criterion(y, x) + mae.jacobi_loss
        loss.backward()
        optimzer.step()
        
        running_loss += loss.item()
        if i % 200 == 0:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i, running_loss / 200))
            running_loss = 0.0

In [None]:
# save the weights


In [None]:
# load the weights if necessary


In [None]:
# generate images
