# AutoEncoder

In [1]:
import os
import torch
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image

if not os.path.exists('./AE_img'):
    os.mkdir('./AE_img')

In [4]:
def normalization(tensor, min_value, max_value):
    min_tensor = tensor.min()
    tensor = (tensor - min_tensor)
    max_tensor = tensor.max()
    tensor = tensor / max_tensor
    tensor = tensor * (max_value - min_value) + min_value
    return tensor

def value_round(tensor):
    return torch.round(tensor)

def to_img(x):
    x = x.view(x.size(0), 1, 28, 28)
    return x

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda tensor:normalization(tensor, 0, 1)),
    transforms.Lambda(lambda tensor:value_round(tensor))
])
batch_size = 128

dataset = MNIST('./MNIST_dataset', transform=img_transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./MNIST_dataset\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:09<00:00, 1.01MB/s]


Extracting ./MNIST_dataset\MNIST\raw\train-images-idx3-ubyte.gz to ./MNIST_dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./MNIST_dataset\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 156kB/s]


Extracting ./MNIST_dataset\MNIST\raw\train-labels-idx1-ubyte.gz to ./MNIST_dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./MNIST_dataset\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:01<00:00, 1.33MB/s]


Extracting ./MNIST_dataset\MNIST\raw\t10k-images-idx3-ubyte.gz to ./MNIST_dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST_dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.55MB/s]

Extracting ./MNIST_dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./MNIST_dataset\MNIST\raw






In [9]:
class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(28 * 28, 256),
            nn.ReLU(True), # True means inplace, 원본 tensor를 수정하겠다는 의미
            nn.Linear(256, 64),
            nn.ReLU(True))
        
        self.decoder = nn.Sequential(
            nn.Linear(64, 256),
            nn.ReLU(True),
            nn.Linear(256, 28 * 28),
            nn.Sigmoid())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [19]:
import torchsummary
model = autoencoder().cuda()
torchsummary.summary(model, input_data=(128,784))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 128, 64]             --
|    └─Linear: 2-1                       [-1, 128, 256]            200,960
|    └─ReLU: 2-2                         [-1, 128, 256]            --
|    └─Linear: 2-3                       [-1, 128, 64]             16,448
|    └─ReLU: 2-4                         [-1, 128, 64]             --
├─Sequential: 1-2                        [-1, 128, 784]            --
|    └─Linear: 2-5                       [-1, 128, 256]            16,640
|    └─ReLU: 2-6                         [-1, 128, 256]            --
|    └─Linear: 2-7                       [-1, 128, 784]            201,488
|    └─Sigmoid: 2-8                      [-1, 128, 784]            --
Total params: 435,536
Trainable params: 435,536
Non-trainable params: 0
Total mult-adds (M): 0.87
Input size (MB): 0.38
Forward/backward pass size (MB): 1.33
Params size (MB): 1.66
Estimated Total Size (MB):

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 128, 64]             --
|    └─Linear: 2-1                       [-1, 128, 256]            200,960
|    └─ReLU: 2-2                         [-1, 128, 256]            --
|    └─Linear: 2-3                       [-1, 128, 64]             16,448
|    └─ReLU: 2-4                         [-1, 128, 64]             --
├─Sequential: 1-2                        [-1, 128, 784]            --
|    └─Linear: 2-5                       [-1, 128, 256]            16,640
|    └─ReLU: 2-6                         [-1, 128, 256]            --
|    └─Linear: 2-7                       [-1, 128, 784]            201,488
|    └─Sigmoid: 2-8                      [-1, 128, 784]            --
Total params: 435,536
Trainable params: 435,536
Non-trainable params: 0
Total mult-adds (M): 0.87
Input size (MB): 0.38
Forward/backward pass size (MB): 1.33
Params size (MB): 1.66
Estimated Total Size (MB):

In [11]:
learning_rate, num_epochs = 1e-3, 50
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate, weight_decay=1e-5)
for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = img.view(img.size(0), -1)
        img = Variable(img).cuda()
        
        output = model(img)
        MSE_loss = nn.MSELoss()(output, img)
        
        optimizer.zero_grad()
        MSE_loss.backward()
        optimizer.step()
        
    if epoch % 10 == 0 or epoch == 49 :
        print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, MSE_loss.item()))
        
        x_gt = to_img(img.cpu().data)
        x_generated = to_img(output.cpu().data)
        
        save_image(x_gt, './AE_img/ground_truth_{}.png'.format(epoch))
        save_image(x_generated, './AE_img/generated_x{}.png'.format(epoch))

torch.save(model.state_dict(), './auto_encoder.pth')

epoch [1/50], loss:0.0343
epoch [11/50], loss:0.0150
epoch [21/50], loss:0.0120
epoch [31/50], loss:0.0124
epoch [41/50], loss:0.0107
epoch [50/50], loss:0.0117


# Variational AutoEncoder

In [1]:
import os
import torch
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image
import pytorch_model_summary

if not os.path.exists('./VAE_img'):
    os.mkdir('./VAE_img')

In [7]:
def normalization(tensor, min_value, max_value):
    min_tensor = tensor.min()
    tensor = (tensor - min_tensor)
    max_tensor = tensor.max()
    tensor = tensor / max_tensor
    tensor = tensor * (max_value - min_value) + min_value
    return tensor

def value_round(tensor):
    return torch.round(tensor)

def to_img(x):
    x = x.view(x.size(0), 1, 28, 28)
    return x

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda tensor:normalization(tensor, 0, 1)),
    transforms.Lambda(lambda tensor:value_round(tensor))
])
batch_size = 256

dataset = MNIST('./MNIST_dataset', transform=img_transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [8]:
class VariationalAutoencoder(nn.Module):
    def __init__(self):
        super(VariationalAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(28 * 28, 400),
            nn.ReLU(True),
            nn.Linear(400, 40))
        self.decoder = nn.Sequential(
            nn.Linear(20, 400),
            nn.ReLU(True),
            nn.Linear(400, 28 * 28),
            nn.Sigmoid())

    def reparametrization(self, mu, logvar):
        var = logvar.exp()
        std = var.sqrt()
        eps = Variable(torch.cuda.FloatTensor(std.size()).normal_())
        return eps.mul(std).add(mu)

    def forward(self, x):
        h = self.encoder(x)
        mu = h[:, :20]
        logvar = h[:, 20:]
        z = self.reparametrization(mu, logvar)
        x_gen = self.decoder(z)
        return x_gen, mu, logvar

    def interpolation(self, x_1, x_2, alpha):
        traverse_1 = self.encoder(x_1)
        traverse_2 = self.encoder(x_2)
        mu_1, mu_2 = traverse_1[:, :20], traverse_2[:, :20]
        logvar_1, logvar_2 = traverse_1[:, 20:], traverse_2[:, 20:]
        traverse_m = (1 - alpha) * mu_1 + alpha * mu_2
        traverse_logvar = (1 - alpha) * logvar_1 + alpha * logvar_2
        z = self.reparametrization(traverse_m, traverse_logvar)
        generated_image = self.decoder(z)
        return generated_image

In [9]:
model = VariationalAutoencoder().cuda()
print(pytorch_model_summary.summary(model, torch.zeros(1,784).cuda(), show_input = True))

-----------------------------------------------------------------------
      Layer (type)         Input Shape         Param #     Tr. Param #
          Linear-1            [1, 784]         314,000         314,000
            ReLU-2            [1, 400]               0               0
          Linear-3            [1, 400]          16,040          16,040
          Linear-4             [1, 20]           8,400           8,400
            ReLU-5            [1, 400]               0               0
          Linear-6            [1, 400]         314,384         314,384
         Sigmoid-7            [1, 784]               0               0
Total params: 652,824
Trainable params: 652,824
Non-trainable params: 0
-----------------------------------------------------------------------


In [10]:
BCE = nn.BCELoss()
num_epochs, learning_rate = 50, 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = img.view(img.size(0), -1)
        img = Variable(img).cuda()
        x_gen, mu, logvar = model(img)
        NKLD = mu.pow(2).add(logvar.exp()).mul(-1).add(logvar.add(1))
        KLD = torch.sum(NKLD).mul(-0.5)
        KLD /= batch_size * 784
        loss = BCE(x_gen, img) + KLD
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if epoch % 10 == 0 or (epoch+1) == num_epochs:
        print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch + 1, num_epochs, loss.item()))
        x_gt = to_img(img.cpu().data)
        x_gen = to_img(x_gen.cpu().data)
        save_image(x_gt, './VAE_img/ground_truth_{}.png'.format(epoch))
        save_image(x_gen, './VAE_img/generated_x{}.png'.format(epoch))
        batch = next(iter(dataloader))
        batch = batch[0].clone().detach()
        batch = batch.view(batch.size(0), -1)
        batch = Variable(batch).cuda()
        x_1 = batch[0:1]
        x_2 = batch[1:2]
        generated_images = []
        for alpha in torch.arange(0.0, 1.0, 0.1):
            generated_images.append(model.interpolation(
                x_1, x_2, alpha))
        generated_images = torch.cat(generated_images, 0).cpu().data
        save_image(generated_images.view(-1, 1, 28, 28),
                   './VAE_img/interpolation_{}.png'.format(epoch),
                   nrow=1)
torch.save(model.state_dict(), './variational_autoencoder.pth')

epoch [1/50], loss:0.1562
epoch [11/50], loss:0.0941
epoch [21/50], loss:0.0886
epoch [31/50], loss:0.0841
epoch [41/50], loss:0.0770
epoch [50/50], loss:0.0746
