<a href="https://colab.research.google.com/github/ingabLee/Book_GenerateAIForComptureVision/blob/main/GenerateAI_Vision_Chapter_2_1_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch



In [2]:
import os
import torch
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image
from google.colab import drive

drive.mount('/content/drive')

os.makedirs('/content/drive/MyDrive/VisionForGenerateAI/AE_img', exist_ok=True)


Mounted at /content/drive


In [3]:
# tensor data normalization. (find min, max value)
def normalization(tensor, min_value, max_value):
  min_tensor = tensor.min()
  tensor = (tensor - min_tensor)
  max_tensor = tensor.max()
  tensor = tensor / max_tensor
  tensor = tensor * (max_value - min_value) + min_value
  return tensor

# tensor value round
def value_round(tensor):
  return torch.round(tensor)

# 생성한 tensor을 이미지로 출력하기 위한 차원변환을 수행하는 함수.
def to_img(x):
  x = x.view(x.size(0), 1, 28, 28)
  return x

# MINST 데이터를 학습에 사용하기위해서 전처리를 수행하는 transform함수를 선언
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda tensor:normalization(tensor, 0, 1)),
    transforms.Lambda(lambda tensor:value_round(tensor))
])

batch_size = 128    # 배치 사이즈 128

# load MNIST dataset with transform(전처리)
dataset = MNIST('./MNIST_dataset', transform=img_transform, download=True)

# 배치사이즈에 맞게 dataloader를 설정하여 shuffle옵션을 통해 섞는다.
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

100%|██████████| 9.91M/9.91M [00:00<00:00, 12.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 346kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.21MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 10.9MB/s]


In [5]:
# 인코더와 디코더로 구성된 AutoEncoder를 선언.
class autoencoder(nn.Module):
  def __init__(self):
    super(autoencoder, self).__init__()

    # create encoder. 각각 두개의 선형 레이어와 활성화 함수로 구성.
    # 입력 크기는 [N, 28X28]로 설정. N는 배치 사이즈이다.
    self.encoder = nn.Sequential(
        nn.Linear(28 * 28, 256),
        nn.ReLU(True),
        nn.Linear(256, 64),
        nn.ReLU(True))

    # create decoder.   입출력 데이터 크기는 인코더와 반대되며
    # 마찬가지로  각각 두개의 선형 레이어와 활성화 함수로 구성
    self.decoder = nn.Sequential(
        nn.Linear(64, 256),
        nn.ReLU(True),
        nn.Linear(256, 28*28),
        nn.Sigmoid())

  # AutoEncoder의 동작순서를 정의.  입력데이터가 인코더와 디코더를 순차적으로 통과하여
  # 출력데이터로  계산.
  def forward(self, x):
    x = self.encoder(x)
    x = self.decoder(x)
    return x


import torchsummary

#  autoencoder를 생성하고 GPU에 로드
model = autoencoder().cuda()

# torchsummary 통해 모델 구정 확인
torchsummary.summary(model, (96, 784))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1              [-1, 96, 256]         200,960
              ReLU-2              [-1, 96, 256]               0
            Linear-3               [-1, 96, 64]          16,448
              ReLU-4               [-1, 96, 64]               0
            Linear-5              [-1, 96, 256]          16,640
              ReLU-6              [-1, 96, 256]               0
            Linear-7              [-1, 96, 784]         201,488
           Sigmoid-8              [-1, 96, 784]               0
Total params: 435,536
Trainable params: 435,536
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.29
Forward/backward pass size (MB): 1.99
Params size (MB): 1.66
Estimated Total Size (MB): 3.94
----------------------------------------------------------------


In [8]:
test = nn.Linear(28*28, 256)
print(test)

Linear(in_features=784, out_features=256, bias=True)
