In [1]:
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST
import os
import numpy as np
from torchsummary import summary #for summary
import matplotlib.pyplot as plt

if not os.path.exists('./dc_img'):
    os.mkdir('./dc_img')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
np.random.seed = 23

In [4]:
def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x
  

num_epochs = 100
batch_size = 32
learning_rate = 1e-3

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5), (0.5))
])

dataset = MNIST('./data', transform=img_transform, download=False)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [12]:
class encoder(nn.Module):
    def __init__(self):
        super(encoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 16, 5, 5
            nn.Conv2d(16, 8, 3, stride=2, padding=1),  # b, 8, 3, 3
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=1)  # b, 8, 2, 2
        )
        
    def forward(self, data):
        return self.encoder(data)
    
class decoder(nn.Module):
    def __init__(self):
        super(decoder, self).__init__()
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 16, 3, stride=2),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 5, stride=3, padding=1),  # b, 8, 15, 15
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 1, 2, stride=2, padding=1),  # b, 1, 28, 28
            nn.Tanh()
        )
    
    def forward(self,data):
        return self.decoder(data)
    

class VariationalAutoencoder(nn.Module):
    def __init__(self):
        super(VariationalAutoencoder, self).__init__()
        self.input_size = 32
        self.output_size = 32
        self.encoder_model = encoder().to(device)
        self.decoder_model  = decoder().to(device)
        self.fc1 = nn.Linear(32,32).to(device)
        self.fc2 = nn.Linear(32,32).to(device)
        
    def forward(self, data):
        x = self.encoder_model(data)
        encoder_out = x.to(device)
        self.mean_out = self.fc1(encoder_out)
        self.log_sigma = self.fc1(encoder_out)
        self.std_div = torch.exp(self.log_sigma)
        noraml_values = torch.from_numpy(np.random.normal(0,1,size=self.std_div.size())).float().to(device)
        latent_out = self.std_div*Variable(noraml_values, requires_grad = False) + self.mean_out
        return self.decoder_model(latent_out.float().to(device))
    
# vae = VariationalAutoencoder(28*28, 100, 100).to(device)

encoder_model = encoder().to(device)
decoder_model = decoder().to(device)

summary(encoder_model, (1,28,28))
summary(decoder_model, (8,2,2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 10, 10]             160
              ReLU-2           [-1, 16, 10, 10]               0
         MaxPool2d-3             [-1, 16, 5, 5]               0
            Conv2d-4              [-1, 8, 3, 3]           1,160
              ReLU-5              [-1, 8, 3, 3]               0
         MaxPool2d-6              [-1, 8, 2, 2]               0
Total params: 1,320
Trainable params: 1,320
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.03
Params size (MB): 0.01
Estimated Total Size (MB): 0.04
----------------------------------------------------------------
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
   ConvTranspose2d-1             [-1, 16, 5, 

In [36]:
layer = nn.Linear(8*2*2, 8*2*2).to(device)
enc_out = encoder_model.forward(data.to(device))
enc_out.shape

torch.Size([32, 8, 2, 2])

In [41]:
input_data = enc_out.reshape([32,32])
layer(input_data)

torch.Size([32, 32])

In [28]:
enc_out[0].reshape([8,2,2])

tensor([[[0.0000, 0.0000],
         [0.0000, 0.0000]],

        [[0.3734, 0.3734],
         [0.4368, 0.3734]],

        [[0.0942, 0.0000],
         [0.0893, 0.3088]],

        [[0.0000, 0.0000],
         [0.0000, 0.0000]],

        [[0.0297, 0.0000],
         [0.0297, 0.0054]],

        [[0.0000, 0.1564],
         [0.0000, 0.1401]],

        [[0.0000, 0.0000],
         [0.0000, 0.0000]],

        [[0.1158, 0.0000],
         [0.2618, 0.2618]]], device='cuda:0', grad_fn=<ViewBackward>)