## Autoencoder

In [244]:
from torch import Tensor, randn
from torch.autograd import Variable
from torch.nn import LSTM

x = Variable(Tensor([[[0, 0], [0, 0]]]), requires_grad = False)

w0 = Variable(randn(1, 1, 2), requires_grad = True)
w1 = Variable(randn(1, 1, 2), requires_grad = True)
s0 = Variable(randn(1, 1, 2))
s1 = Variable(randn(1, 1, 2))

for i in range(4):
    
    h, _ = LSTM(2, 2)(x, (w0, s0))
    y, _ = LSTM(2, 2)(h, (w1, s1))
    
    loss = ((y - x).mean()).abs()
    loss.backward()
    
    learning_rate = 2 ** -4
    w0.data -= learning_rate * w0.grad.data
    w1.data -= learning_rate * w1.grad.data
    
    w0.grad.data.zero_()
    w1.grad.data.zero_()
    
    print(loss)

Variable containing:
 0.1260
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-02 *
  5.5477
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-02 *
  8.9291
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-02 *
  3.6306
[torch.FloatTensor of size 1]



## VAE

In [4]:
from torch import Tensor, randn
from torch.autograd import Variable
from torch.nn import LSTM, KLDivLoss
from torch.optim import SGD

In [11]:
# training data
x  = Variable(Tensor([[[0 for j in range(16)] for i in range(16)]]), requires_grad = False)

In [6]:
# initial weights
w0 = Variable(randn(1, 1, 16), requires_grad = True)
w1 = Variable(randn(1, 1, 16), requires_grad = True)
w2 = Variable(randn(1, 1, 16), requires_grad = True)
w3 = Variable(randn(1, 1, 16), requires_grad = True)
# initial states
s0 = Variable(randn(1, 1, 16))
s1 = Variable(randn(1, 1, 16))
s2 = Variable(randn(1, 1, 16))
s3 = Variable(randn(1, 1, 16))

In [7]:
def encoder(x, w0, w1, s0, s1):
    h, _ = LSTM(16, 16)(x, (w0, s0))
    a, _ = LSTM(16, 16)(h, (w1, s1))
    b, _ = LSTM(16, 16)(h, (w1, s1))
    return a, b # μ, log(σ^2)

In [13]:
def sampling(mean, lvar):
    norm = Variable(randn(16, 16))
    return mean + (lvar / 2).exp() * norm # N(μ, σ^2) = μ + σ * N(0, 1), σ = exp(log(σ^2) / 2)

In [9]:
def decoder(l, w2, w3, s2, s3):
    h, _ = LSTM(16, 16)(l, (w2, s2))
    y, _ = LSTM(16, 16)(h, (w3, s3))
    return y

In [14]:
for i in range(4):
    
    a, b = encoder  (x, w0, w1, s0, s1)
    l    = sampling (a, b)
    y    = decoder  (l, w2, w3, s2, s3)
    
    loss = KLDivLoss()(l, x) + (y - x).mean().abs()
    loss.backward()
    
    SGD([w0, w1, w2, w3], lr = 2 ** -4)
    
    w0.grad.data.zero_()
    w1.grad.data.zero_()
    w2.grad.data.zero_()
    w3.grad.data.zero_()
    
    print(loss)

Variable containing:
1.00000e-02 *
  5.0966
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-02 *
  4.0201
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-02 *
  4.5667
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-02 *
  4.6564
[torch.FloatTensor of size 1]

