# **2020 하계 프로젝트 #3**

* Eveing 스터디 2020년 하계 방학 개인 프로젝트 # 3
* 개발기간 : 2020.08.12~
* 주제 : MNIST dataset을 이용한 VAE 연습
* 목표 : generater 사용하여 과부하 줄이기/ 재구성 이미지 성공적으로 도출하기

## **settings**

In [1]:
from google.colab import drive

drive.mount('/content/gdrive',force_remount=False)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [6]:
# GPU 환경 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
  torch.cuda.manual_seed_all(777)

In [2]:
# prerequisites
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torchvision import datasets
from torch.autograd import Variable
from torchvision.utils import save_image

## **hyper parameters**

In [9]:
img_shape = (1,28,28)
batch_size = 16
latent_dim = 2

## **data**

In [14]:
import torchvision.datasets as datasets

transform = transforms.Compose(
[transforms.ToTensor(),
 transforms.Normalize((0.5,), (0.5,))])

trainSet = datasets.MNIST(root='./data', train=True,
                                    download=True, transform=transform)
trainLoader = torch.utils.data.DataLoader(trainSet, batch_size=batch_size,
                                      shuffle=True, num_workers=2)

testSet = datasets.MNIST(root='./data', train=False, 
                              download=True, transform=transform)
testLoader = torch.utils.data.DataLoader(testSet, batch_size=batch_size,
                                      shuffle=True, num_workers=2)

In [15]:
len(trainSet)

60000

In [16]:
len(testSet)

10000

In [20]:
for i, images in enumerate(trainSet):
  print(images[0].size())
  break

torch.Size([1, 28, 28])


## **vae**

//output size 계산법

**conv2d**

ouput size = (input size - filter width+2*padding)/stride + 1

**conv2d_transpose**

output size = (input size -1)*stride -2padding+ filter_height


In [36]:
class VAE(nn.Module):
  def __init__(self):
    super(VAE, self).__init__()

    # encoder part
    self.encoder = nn.Sequential(
        # encoder input size = (28, 28), channel = 1
        nn.Conv2d(1, 32, kernel_size=3, stride=1), # (1, 28, 28) -> (32, 26, 26)
        nn.BatchNorm2d(32),  
        nn.LeakyReLU(),  
        nn.Dropout(p=0.25),
        
        nn.Conv2d(32, 64, kernel_size=3, stride = 2), # (32, 26, 26) -> (64, 12, 12)  
        nn.BatchNorm2d(64),  
        nn.LeakyReLU(),    
        nn.Dropout(p=0.25),

        nn.Conv2d(64, 64, kernel_size=3, stride=2),   # (64, 12, 12) -> (64, 9, 9)
        nn.BatchNorm2d(64),   
        nn.LeakyReLU(), 
        nn.Dropout(p=0.25),

        nn.Conv2d(64, 2, kernel_size=3, stride=2),   # (64, 9, 9) -> (2, 4, 4)
        nn.BatchNorm2d(2),   
        nn.LeakyReLU(), 
        nn.Dropout(p=0.25),

        nn.Flatten()
        # output size = 2*4*4 = h_dim        
    )
    self.fc_mu = nn.Linear(32, 2) # 잠재공간의 차원 z_dim : 2으로 설정
    self.fc_log_var = nn.Linear(32, 2)
    self.fc_decode = nn.Linear(2, 32)

    # decoder part
    self.decoder = nn.Sequential(
        
        nn.ConvTranspose2d(2, 64 ,kernel_size=3, stride=2), # (2, 4, 4) -> (64, 9, 9)
        nn.BatchNorm2d(64),
        nn.LeakyReLU(),
        nn.Dropout(p=0.25),

        nn.ConvTranspose2d(64, 64 ,kernel_size=4, stride=1),# (64, 9, 9) -> (64, 12, 12)
        nn.BatchNorm2d(64),
        nn.LeakyReLU(),
        nn.Dropout(p=0.25),

        nn.ConvTranspose2d(64, 32 ,kernel_size=4, stride=2),# (64, 12, 12) -> (32, 26, 26)
        nn.BatchNorm2d(32),
        nn.LeakyReLU(),
        nn.Dropout(p=0.25),
        
        nn.ConvTranspose2d(32, 1 ,kernel_size=3, stride=1),# (32, 26, 26) -> (1, 28, 28)
        nn.Sigmoid(),
    )
  def encode(self, x):
    h = self.encoder(x) # (batch_size, 32)
    mu, log_var = self.fc_mu(h), self.fc_log_var(h)
    z = self.reparameterize(mu, log_var)
    return z, mu, log_var

  def reparameterize(self, mu, log_var):
    sigma = torch.exp(0.5*log_var)  # 교재 p108 참고
    epsilon = torch.randn_like(sigma) # sigma와 같은 크기의 정규분포 랜덤 텐서 생성
    return mu + sigma * epsilon

  def decode(self, z):
    z = self.fc_decode(z)
    z = self.decoder(z.view(z.size(0), 2, 4, 4)) # Unflatten 대신 직접 reshape
    return z

  def forward(self, x):
    z, mu, log_var = self.encode(x)
    z = self.decode(z)
    return z, mu, log_var

In [37]:
# model 내 input-output size 맞춘 것 확인

tst_model = VAE().to(device)
tst_input = (torch.rand(1, 1, 28, 28)).to(device)
tst_out = tst_model(tst_input)

print(len(tst_out)) # (z, mu, log_var) 3가지 텐서를 담고 있는 tuple 형
tst_gen_img, tst_mu, tst_log_var = tst_out
print("생성된 이미지 크기 : ", tst_gen_img.shape)
print("mu 크기 : ", tst_mu.shape)
print("log_var 크기 :",tst_log_var.shape)

tst_z, _, _ = tst_model.encode(tst_input)
print("잠재공간 크기 :",tst_z.shape)

RuntimeError: ignored