In [None]:
# Mnist라는 숫자를 그림으로 표현한 것을 학습시킬 것 기존의 autoencoder구조를 개선한 것을 적용하기!

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt

In [None]:
z_dim=128

In [None]:
# autoencoder model 정의
class Autoencoder(nn.Module):
  def __init__(self, z_dim=128):
    super(Autoencoder, self).__init__()
    # Encoder
    self.encoder = nn.Sequential(
        nn.Conv2d(1, 16, 3, stride=1, padding=1), # 32 x 28 x 28
        nn.ReLU(True),
        nn.Conv2d(16, 32, 3, stride=2, padding=1), # 64 x 14 x 14
        nn.ReLU(True),
        nn.Conv2d(32, 64, 3, stride=2, padding=1), # 64 x 7 x 7
        nn.ReLU(True)
    )
    # Latent space
    self.latent_space = nn.Linear(64*7*7, z_dim)
    # Decoder
    self.decoder_mlp = nn.Sequential(
        nn.Linear(z_dim, 64*7*7),
        nn.ReLU(True),
    )
    self.decoder = nn.Sequential(
        nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1), # 7 x 7 x 32
        nn.ReLU(True),
        nn.ConvTranspose2d(32, 16, 4, stride=2, padding=1), # 14 x 14 x 16
        nn.ReLU(True),
        nn.ConvTranspose2d(16, 1, 3, stride=1, padding=1), # 28 x 28 x 1
        nn.Sigmoid()
    )

  def reparameterize(self, mu, logvar):
    std = torch.exp(0.5*logvar)
    eps = torch.rand_like(std)
    return mu + eps*std


  def encoding(self, x):
    x = self.encoder(x)
    x = x.reshape(x.size(0), -1) # flatten the tensor
    latent_params = self.latent_space(x)
    mu, logvar = torch.chunk(latent_params, 2, dim=1)
    return mu, logvar

  def decoding(self, x):
    x = self.decoder_mlp(x).view(x.size(0), 64, 7, 7)
    x = self.decoder(x) # tensor 차원 수정
    return x

  def forward(self, x):
    x = self.encoder(x)
    # print(x.shape)
    x = x.reshape(x.size(0), -1) # flatten the tensor
    # print(x.shape)
    latent_params = self.latent_space(x)

    mu, logvar = torch.chunk(latent_params, 2, dim=1)
    z = self.reparameterize(mu, logvar)

    # print(x.shape)
    x = self.decoder_mlp(z).view(x.size(0), 64, 7, 7)
    # print(x.shape)
    x = self.decoder(x)
    return x


In [None]:
def loss_function(x_hat, x, mean, log_var, k1, k2):
  reproduction_loss = nn.functional.binary_cross_entropy(x_hat, x, reduction='sum')
  KLD = -0.5*torch.sum(1 + log_var - mean.pow(2) - log_var.exp())
  return k1*reproduction_loss + k2*KLD

In [None]:
# MNIST dataset 불러오기! > 아래의 내용은 기본적으로 tensor로 변환만 하고 있음 > MNIST는 이미 데이터 기본 전처리가 되어있기 때문
transform = transforms.Compose([
    transforms.ToTensor(),
])

In [None]:
train_dataset = MNIST(route='./data', train=True, download=True, transform=transform)
test_dataset = MNIST(route='./data', train=False, download=True, transform=transform)

In [None]:
# 각 클래스별로 저장된 파일을 담을 dict 만들기
samples = {}
for i in range(10):
  samples[i] = None

cpt = 0

# 훈련용 데이터에 들어있는 정보를 클래스별로 저장해두기
for data, target in train_dataset:
  if samples[target] is None:
    samples[target] = data
    cpt += 1
    if cpt == 10:
      break

# sample 그리기
fig, axes = plt.subplots(1, 10, figsize=(12, 3))
for i in range(10):
  axes[i].imshow(samples[i][0], cmap='gray')
  axes[i].set_title(f'Class {i}')
  axes[i].axis('off')

plt.show()

In [None]:
# DataLoader 만들기
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
#
test_loader = DataLoader(test_dataset, batch_size=64)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu" )

In [None]:
from torch import optim
#AutoEncoder model을 이용하여 모델 객체 만들고
# 특정 device로 불러오기 - cpu, gpu 중에 선택
model = Autoencoder(z_dim).to(device)

# optimizer 객체 만들기
# 일반적으로 Adam을 많이 사용함..
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# loss 정의
criterion = nn.MSELoss()

In [None]:
# Tensor의 모습 확인할 것 - 이것은 코드를 작성하는 과정에서만 잠시 임의로 쓰이는 부분
# I = torch.rand((1,1,28,28)).cuda()
# with torch.no_grad():
#  print(model(I).shape)

In [None]:
k1, k2 = 1, 1
# 훈련
num_epochs = 100
train_losses = []
for epoch in range[num_epochs]:
  running_loss = 0.0
  for images, _ in train_loader:
    optimizer.zero_grad()
    outputs, mu, logvar = model(images.float().cuda())
    loss = loss_function(outputs, images.float().cuda(), mu, logvar, k1, k2)
    loss.backward()
    optimizer.step()
    running_loss += loss.item() * images.size(0)
  train_loss = running_loss / len(train_loader.dataset)
  train_losses.append(train_loss)
  if epoch % 10 == 0:
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}')


In [None]:
# random images를 선택하고 그 테스트 이미지들을 재구성하기
# 만약 재구성된 그림의 품질이 좋지 않으면? > latent space의 크기를 늘려볼 것!

num_images = 5

selected_indices = torch.randint(len(test_dataset), size=(num_images,))
reconstructed_images = []
original_images = []
for idx in selected_indices:
  image, _ = test_dataset[idx]
  original_images.append(image[0])
  with torch.no_grad():
    reconstructed_image = model(image.cuda().unsqueeze(0))
    reconstructed_images.append(reconstructed_image.squeeze().detach().cpu().numpy().reshape(28,28))

# 원본 그림, 복원된 그림 확인하기
fig, axes = plt.subplots(num_images, 2, figsize=(8, 2+num_images))
for i in range(num_images):
  axes[i, 0].imshow(original_images[i], cmap='gray')
  axes[i, 0].set_title('Original Image')
  axes[i, 0].axis('off')

  axes[i, 1].imshow(reconstructed_images[i], cmap='gray')
  axes[i, 1].set_title('Reconstructed Image')
  axes[i, 1].axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Scatter plot of latent space with respective class colors
latent_points = []
labels = []
with torch.no_grad():
  for images, targets in test_loader:
    latent = model.encoder(images.cuda().view(images.size(0), -1))
    latent_points.extend(latent.detach().cpu().numpy())
    labels.extend(targets.numpy())


In [None]:
latent_points = np.array(latent_points)
labels = np.array(labels)

In [None]:
if z_dim=2:
  from sklearn.manifold import TSME
  # t-SNE을 적용하여 latent 표현을 2차원으로 차원을 줄이기
  tsne = TSNE(n_components=2, random_state=0)
  latent_2d = tsne.fit_transform(latent_points)
else:
  latent_2d = latent_points
# 2차원으로 표현된 latent space 확인하기
plt.figure(figsize=(10, 8))
plt.scatter(latent_2d[:, 0], latent_2d[:, 1], c=test_dataset.targets, cmap='tab10', alpha=0.5)
plt.colorbar(label('Digit class'))
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.title(f't-SNE Visualization of latent space with k1, k2=[{k1},{k2}]')
plt.savefig('1.png')
plt.show()

ImportError: cannot import name 'TSME' from 'sklearn.manifold' (/usr/local/lib/python3.10/dist-packages/sklearn/manifold/__init__.py)

In [None]:
# 클래스에서 그림 생성하기
cls_num = 0
cls_mu = np.mean(latent_points[labels==cls_num], 0)
cls_std= np.std(latent_points[labels==cls_num], 0)

x, y = np.random.multivariate_normal(cls_mu, np.diag(cls_std), 10).T

plt.figure(figsize=(10, 8))
plt.scatter(latent_2d[:, 0], latent_2d[:, 1], c=test_dataset.targets, cmap='tab10', alpha=0.5)
plt.scatter(x, y, c='black', marker='+')
plt.colorbar(label('Digit class'))
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.title('t-SNE Visualization of latent space with k1, k2=[{k1},{k2}]')
plt.savefig('2.png')


In [None]:
sampled_latent = torch.from_numpy(np.vstack((x,y)).T).cuda().float()
with torch.no_grad():
  reconstructed_images = model.decoding(sampled_latent).clip(0,1).detach().cpu().numpy()

fig, axes = plt.subplots(2,5, figsize=(15,6))
axes = axes.flatten()

for i, ax in enumerate(axes):
  ax.imshow(reconstructed_images[i, 0], 'gray')
  ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# 그림 랜덤하게 생성하기
mu=np.mean(latent_points, 0)
std = np.std(latent_points, 0)

x, y = np.random.multivariate_normal(mu, np.diag(std), 10).T

plt.figure(figsize=(10, 8))
plt.scatter(latent_2d[:, 0], latent_2d[:, 1], c=test_dataset.targets, cmap='tab10', alpha=0.5)
plt.scatter(x, y, c='black', marker='+')
plt.colorbar(label('Digit class'))
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.title('t-SNE Visualization of latent space with k1, k2=[{k1},{k2}]')
plt.savefig('3.png')