# Various Encoder Architectures

In [7]:
import torch
import torch.nn as nn

In [8]:
SAMPLE_X = torch.rand(2, 1, 32)
SAMPLE_Y = torch.randint(2, [2])

## 0. Cute Model
    : cute version of our SimpleModel in Tutorial

In [35]:
class CuteModel(nn.Module):
    def __init__(self):
        super(CuteModel, self).__init__()
        self.enc_layer1 = nn.Conv1d(1, 8, kernel_size=5, stride=2)
        self.enc_layer2 = nn.Conv1d(8, 8, kernel_size=5, stride=2)
        
        self.decoder = nn.Linear(8, 6)
        
    def forward(self, x):
        print('Input size :\t\t',x.size())
        
        x = self.enc_layer1(x)
        print('After 1st layer :\t',x.size())
        x = self.enc_layer2(x)
        print('After 2nd layer :\t',x.size())
        
        x = x.mean(dim=-1)
        print('After mean pool :\t',x.size())
        
        x = self.decoder(x)
        print('After decoder :\t\t',x.size())
        return x

In [36]:
model = CuteModel()
p = model(SAMPLE_X)
loss = nn.CrossEntropyLoss()(p, SAMPLE_Y)
print('[loss] : ', loss)

Input size :		 torch.Size([2, 1, 32])
After 1st layer :	 torch.Size([2, 8, 14])
After 2nd layer :	 torch.Size([2, 8, 5])
After mean pool :	 torch.Size([2, 8])
After decoder :		 torch.Size([2, 6])
[loss] :  tensor(1.8711, grad_fn=<NllLossBackward>)


## 1. DilatedCNN
- In definition of each *self.enc_layer#*, you can define dilation of the Conv1d layer.

<pre>
self.enc_layer1 = nn.Conv1d(1, 8, kernel_size=5, stride=3, <b>dilation=2</b>)
</pre>

In [37]:
class DilatedCNNModel(nn.Module):
    def __init__(self):
        super(DilatedCNNModel, self).__init__()
        self.enc_layer1 = nn.Conv1d(1, 8, kernel_size=5, stride=2, dilation=2)
        self.enc_layer2 = nn.Conv1d(8, 8, kernel_size=5, stride=2, dilation=2)
        
        self.decoder = nn.Linear(8, 6)
        
    def forward(self, x):
        print('Input size :\t\t',x.size())

        x = self.enc_layer1(x)
        print('After 1st layer :\t',x.size())
        x = self.enc_layer2(x)
        print('After 2nd layer :\t',x.size())
        
        x = x.mean(dim=-1)
        print('After mean pool :\t',x.size())
        
        x = self.decoder(x)
        print('After decoder :\t\t',x.size())
        return x

In [38]:
model = DilatedCNNModel()
p = model(SAMPLE_X)
loss = nn.CrossEntropyLoss()(p, SAMPLE_Y)
print('[loss] : ', loss)

Input size :		 torch.Size([2, 1, 32])
After 1st layer :	 torch.Size([2, 8, 12])
After 2nd layer :	 torch.Size([2, 8, 2])
After mean pool :	 torch.Size([2, 8])
After decoder :		 torch.Size([2, 6])
[loss] :  tensor(1.6990, grad_fn=<NllLossBackward>)


## 2. Cumulate each layer outputs
- You can easily concatenate outputs of each encoding layers.

<pre>
x = torch.cat([x1,x2], dim=-1)
</pre>

In [42]:
class CumulateModel(nn.Module):
    def __init__(self):
        super(CumulateModel, self).__init__()
        self.enc_layer1 = nn.Conv1d(1, 8, kernel_size=5, stride=3)
        self.enc_layer2 = nn.Conv1d(8, 8, kernel_size=5, stride=3)
        
        self.decoder = nn.Linear(2*8, 6)
        
    def forward(self, x):
        print('Input size :\t\t',x.size())

        x1 = self.enc_layer1(x)
        print('After 1st layer :\t',x1.size())
        x2 = self.enc_layer2(x1)
        print('After 2nd layer :\t',x2.size())
        
        x1 = x1.mean(dim=-1)
        print('After pool x1 :\t\t',x1.size())
        x2 = x2.mean(dim=-1)
        print('After pool x2 :\t\t',x2.size())
        
        x = torch.cat([x1,x2], dim=-1)
        print('After concat :\t\t', x.size())
        
        x = self.decoder(x)
        print('After decoder :\t\t',x2.size())
        return x

In [43]:
model = CumulateModel()
p = model(SAMPLE_X)
loss = nn.CrossEntropyLoss()(p, SAMPLE_Y)
print('[loss] : ', loss)

Input size :		 torch.Size([2, 1, 32])
After 1st layer :	 torch.Size([2, 8, 10])
After 2nd layer :	 torch.Size([2, 8, 2])
After pool x1 :		 torch.Size([2, 8])
After pool x2 :		 torch.Size([2, 8])
After concat :		 torch.Size([2, 16])
After decoder :		 torch.Size([2, 8])
[loss] :  tensor(1.9182, grad_fn=<NllLossBackward>)


## 3. Variational Model
- You can compute **mean** and **log variance** of *z*.
<pre>
z_mu = self.enc_mu(x)
z_logvar = self.enc_logvar(x)
</pre>

- From *z_mu* and *z_logvar*, you can sample *z*, with **reparameterization trick**.
<pre>
z = self._reparam(z_mu, z_logvar)
</pre>

- You should compute **KL-Divergence** to map latent distriution to prior distribution.
<pre>
self.kld = self._kld_gauss(z_mu, z_logvar)
</pre>

- We provide you sample functions for **reparameterization trick** and computing **KL-Divergence**, so don't worry and just copy them!
<pre>
def _reparam(self, mu, logvar):
def _kld_gauss(self, mu, logvar):
</pre>


In [48]:
class VariationalModel(nn.Module):
    def __init__(self):
        super(VariationalModel, self).__init__()
        self.enc_layer1 = nn.Conv1d(1, 8, kernel_size=5, stride=3)
        self.enc_layer2 = nn.Conv1d(8, 8, kernel_size=5, stride=3)
        
        self.enc_mu = nn.Linear(8, 8)
        self.enc_logvar = nn.Linear(8, 8)
        
        self.kld = torch.tensor(0)
        
        self.decoder = nn.Linear(8, 6)
        
    def forward(self, x):
        print('Input size :\t\t',x.size())
        
        x = self.enc_layer1(x)
        print('After 1st layer :\t',x.size())
        x = self.enc_layer2(x)
        print('After 2nd layer :\t',x.size())
        
        x = x.mean(dim=-1)
        print('After mean pool :\t',x.size())
        
        z_mu = self.enc_mu(x)
        z_logvar = self.enc_logvar(x)
        print('z_mu & z_logvar :\t',z_mu.size())
        z = self._reparam(z_mu, z_logvar)
        print('Sampled z shape :\t',z.size())
        
        self.kld = self._kld_gauss(z_mu, z_logvar)
        
        x = self.decoder(z)
        print('After decoder :\t\t',x.size())
        return x
    
    def _reparam(self, mu, logvar):
        if self.training:
            std = logvar.mul(0.5).exp_()
            eps = torch.autograd.Variable(std.data.new(std.size()).normal_())
            return eps.mul(std).add_(mu)
        else:
            return mu
        
    def _kld_gauss(self, mu, logvar):
        kld_element = -logvar + torch.exp(logvar) + mu**2 - 1
        return 0.5 * torch.mean(kld_element)

***(IMPORTANT!!!)*** If you want to use variational model and regularize the latent distribution, you should add ***KL-Divergence term*** to the loss term.

<pre>
...
class_loss = CrossEntropyLoss()(pred, target)
<b>kld_loss = model.kld</b>
total_loss = class_loss + <b>BETA*kld_loss</b>
total_loss.backward()
...
</pre>

Also, you can control portion of classification loss and KLD loss by setting BETA. (ex: BETA=0.1 or BETA=10)

In [49]:
BETA = 1
model = VariationalModel()
p = model(SAMPLE_X)
loss = nn.CrossEntropyLoss()(p, SAMPLE_Y) + BETA*model.kld
print('[loss] : ', loss)

Input size :		 torch.Size([2, 1, 32])
After 1st layer :	 torch.Size([2, 8, 10])
After 2nd layer :	 torch.Size([2, 8, 2])
After mean pool :	 torch.Size([2, 8])
z_mu & z_logvar :	 torch.Size([2, 8])
Sampled z shape :	 torch.Size([2, 8])
After decoder :		 torch.Size([2, 6])
[loss] :  tensor(1.6636, grad_fn=<AddBackward0>)


## Try build your own model architecture