In [98]:
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST
import os
import numpy as np
from torchsummary import summary #for summary
import matplotlib.pyplot as plt

if not os.path.exists('./dc_img'):
    os.mkdir('./dc_img')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
np.random.seed = 23

In [99]:
def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x
  

num_epochs = 100
batch_size = 128
learning_rate = 1e-3

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5), (0.5))
])

dataset = MNIST('./data', transform=img_transform, download=False)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [233]:
class encoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(encoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.Linear(hidden_size, output_size)
        )
        
    def forward(self, data):
        return self.encoder(data)
    
class decoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(decoder, self).__init__()
        self.decoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.Linear(hidden_size, output_size)
        )
    
    def forward(self,data):
        return self.decoder(data)
    

class VariationalAutoencoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(VariationalAutoencoder, self).__init__()
        self.input_size = 100
        self.output_size = 8
        self.encoder_model = encoder(input_size, hidden_size, output_size).to(device)
        self.decoder_model  = decoder(self.output_size, hidden_size, input_size).to(device)
        self.fc1 = nn.Linear(self.input_size, self.output_size).to(device)
        self.fc2 = nn.Linear(self.input_size, self.output_size).to(device)
        
    def forward(self, data):
        x = self.encoder_model(data)
        encoder_out = x.to(device)
        self.mean_out = self.fc1(encoder_out)
        self.std_div = self.fc1(encoder_out)
        noraml_values = torch.from_numpy(np.random.normal(0,1,size=self.output_size)).to(device)
        self.std_div = torch.exp(self.std_div)
        latent_out = torch.mul(noraml_values, self.std_div) + self.mean_out
        return self.decoder_model(latent_out.float().to(device))
    

encoder_model = encoder(28*28, 100, 100).to(device)
decoder_model = decoder(100,100,28*28).to(device)

summary(encoder_model, (1,28*28))
summary(decoder_model, (100,100))

vae = VariationalAutoencoder(28*28, 100, 100).to(device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 100]          78,500
            Linear-2               [-1, 1, 100]          10,100
Total params: 88,600
Trainable params: 88,600
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.34
Estimated Total Size (MB): 0.34
----------------------------------------------------------------
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1             [-1, 100, 100]          10,100
            Linear-2             [-1, 100, 784]          79,184
Total params: 89,284
Trainable params: 89,284
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.04
Forward/backwa

In [241]:
def latent_loss(mean, std_div, beta):
    mean_sq = mean**2
    std_div_sq = std_div**2
    return beta*torch.mean(mean_sq + std_div_sq + torch.log(std_div) - 1)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(vae.parameters(), lr=0.001)

In [242]:
# test = img[0].reshape([1,1,28,28]).to(device)
# output = vae.forward(test)
# plt.imshow(output.reshape([28,28]).cpu().detach().numpy())

In [None]:
num_epochs = 100
beta = 0.5
input_dim = 28*28

for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = Variable(img.resize_(batch_size, input_dim)).to(device)
        
        # ===================forward=====================
        output = vae.forward(img)
        ll = latent_loss(vae.mean_out, vae.std_div, beta)
        loss = criterion(output, img) + ll
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log========================
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch+1, num_epochs, loss.item()))
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, './dc_img/image_{}.png'.format(epoch))

epoch [1/100], loss:-0.2084
epoch [2/100], loss:-0.2130
epoch [3/100], loss:-0.2024
epoch [4/100], loss:-0.1995
epoch [5/100], loss:-0.2151
epoch [6/100], loss:-0.2131
epoch [7/100], loss:-0.2250
epoch [8/100], loss:-0.2089
epoch [9/100], loss:-0.2066
epoch [10/100], loss:-0.2139
epoch [11/100], loss:-0.2137
epoch [12/100], loss:-0.2069
epoch [13/100], loss:-0.2148
epoch [14/100], loss:-0.2230
epoch [15/100], loss:-0.2131
epoch [16/100], loss:-0.2076


In [79]:
enc_output = encoder_model(img[0].reshape([1,1,28,28]).to(device))

In [80]:
enc_output.flatten().detach().to(device)

tensor([0.3479, 0.3849, 0.0849, 0.0282, 0.0926, 0.0526, 0.0926, 0.0526, 0.0951,
        0.0472, 0.0191, 0.0472, 0.4813, 0.4813, 0.4813, 0.4813, 0.0495, 0.0000,
        0.0000, 0.0000, 0.3361, 0.3361, 0.2729, 0.2729, 0.2045, 0.2068, 0.1166,
        0.2068, 0.0679, 0.0000, 0.0000, 0.0000], device='cuda:0')

In [84]:
layer = nn.Linear(32,32).to(device)

In [85]:
layer(enc_output.flatten())

tensor([ 0.0222, -0.3375, -0.2784,  0.0292,  0.1685, -0.1493,  0.0920, -0.1439,
         0.3920, -0.1440, -0.1780,  0.2583,  0.0262, -0.0262,  0.0156, -0.3385,
        -0.1810, -0.2492, -0.1742, -0.0213, -0.0862,  0.1431, -0.0735,  0.0294,
         0.1364, -0.1996,  0.2904,  0.2791, -0.1360,  0.0877,  0.3254,  0.2780],
       device='cuda:0', grad_fn=<AddBackward0>)

In [34]:
enc_output.flatten().detach().to(device)

tensor([4.7437e-03, 4.7437e-03, 1.0842e-01, 1.0734e-01, 1.1454e-01, 0.0000e+00,
        1.1454e-01, 0.0000e+00, 1.4589e-01, 8.1845e-02, 1.7316e-01, 1.7316e-01,
        8.2318e-02, 8.2318e-02, 3.3016e-02, 0.0000e+00, 1.9837e-01, 0.0000e+00,
        1.1395e-01, 0.0000e+00, 4.4489e-01, 4.4489e-01, 4.4489e-01, 4.4489e-01,
        0.0000e+00, 0.0000e+00, 1.2180e-04, 0.0000e+00, 1.6094e-01, 0.0000e+00,
        1.6094e-01, 5.9207e-03], device='cuda:0')

In [77]:
data = layer(enc_output.flatten())

In [88]:
data.shape

torch.Size([32])

In [66]:
torch.mul(data,data) +data

tensor([ 0.2172, -0.1450, -0.0567,  0.0502, -0.2140,  0.0199, -0.1157,  0.2501,
        -0.0427,  0.0933,  0.0981,  0.1532, -0.1105,  0.1093,  0.0033, -0.0025,
         0.1739, -0.0625,  0.0688,  0.0779,  0.3500,  0.0966,  0.2517, -0.1722,
        -0.0403,  0.2566, -0.1326, -0.1173, -0.1251,  0.1015,  0.1532,  0.3012],
       device='cuda:0', grad_fn=<AddBackward0>)