# Auto-Encoder in Pytorch

In this tutorial we will introduce and implement auto-encoders. 
[paper](http://www.icml-2011.org/papers/455_icmlpaper.pdf)

In [1]:
%matplotlib inline
%config IPCompleter.greedy=True

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import grad

## Checking gradients of CAE

In [3]:
class BaseAutoEncoder(nn.Module):
    
    def __init__(self):
        super(BaseAutoEncoder, self).__init__()
        self.in_fetaures = 20
        self.out_features = 5
        
        # important to create here all the function which contains parameters to be trained
        # otherwise model.parameters() would return an empty list
        self.lin_enc = nn.Linear(self.in_fetaures, self.out_features)
        self.lin_dec = nn.Linear(self.out_features, self.in_fetaures)
    
    def forward(self, x):
        x.requires_grad=True
        self.y_enc = torch.sigmoid(self.lin_enc(x))
        self.jacobi_term = self.__jacobi(x, self.y_enc)
        y_out = torch.sigmoid(self.lin_dec(self.y_enc))
        return y_out
    
    def __jacobi(self, x, y):
        j_frobenius = 0.0
        print(x, y)
        for i in range(self.out_features):
            # creat_graph is important because we will calculate the gradient of j_frobenius according to model parameters
            gradients = grad(y[i], x, retain_graph=True, create_graph=True)[0] 
            j_frobenius += gradients.pow(2).sum()
        return j_frobenius

In [4]:
x = torch.rand(20, requires_grad=True)
bae = BaseAutoEncoder()
y = bae(x)

tensor([0.3990, 0.7454, 0.3830, 0.9446, 0.4892, 0.6996, 0.3893, 0.9953, 0.2582,
        0.5677, 0.8482, 0.0058, 0.1002, 0.6475, 0.1915, 0.0787, 0.9594, 0.2624,
        0.7715, 0.6969], requires_grad=True) tensor([0.5364, 0.5964, 0.4088, 0.6435, 0.6744], grad_fn=<SigmoidBackward>)


In [5]:
bae.jacobi_term

tensor(0.0905, grad_fn=<ThAddBackward>)

In [6]:
ws = []
for p in bae.parameters():
    ws.append(p.data)

In [7]:
criterion = nn.MSELoss()
loss = criterion(y, x) + bae.jacobi_term
bae.zero_grad()
loss.backward()

In [8]:
# derivative of Jacobi term according to W and b
w = ws[0]
w.requires_grad=True
b = ws[1]
b.requires_grad=True

s = torch.sigmoid(torch.matmul(w, x) + b)
ou = torch.matmul(torch.diag(s * (1-s)), w).norm().pow(2) # manual calculation of the Frobenius norm of the Jacobian
j_w = grad(ou, w, retain_graph=True)[0]
j_b = grad(ou, b, retain_graph=True)[0]

In [9]:
# derivative according to the decoder W and b parameters
sigm_deriv = y * (1-y)
sigm_jacobi = torch.diag(sigm_deriv)
dec_b = torch.matmul(y-x, sigm_jacobi) * 2
dec_w = torch.ger(dec_b, bae.y_enc)

# derivative according to the encoder W and b parameters
delta = torch.matmul(torch.matmul((y-x), sigm_jacobi), ws[2])
enc_b = torch.matmul(delta, torch.diag(bae.y_enc * (1-bae.y_enc))) * 2
enc_w = torch.ger(enc_b, x)

In [10]:
for p, calc_grad in zip(bae.parameters(), [enc_w + j_w, enc_b + j_b, dec_w, dec_b]):
    assert torch.le(torch.abs(p.grad - calc_grad), 1e-7).sum() == calc_grad.numel()
print('Gradients are OK!')

Gradients are OK!


## MNIST example for CAE