In [None]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms import transforms
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [None]:
# 그림을 폴더에서 가져오기
folder_path = 'Images'
images = []
for filename in os.listdir(folder_path):
    img_path = os.path.join(folder_path, filename)
    img = cv2.imread(img_path)
    img_gray_resized = cv2.resize(img, (256, 256))
    images.append(img_gray_resized/255.0) # 255.0으로 나누는 것은 계산하는 숫자의 값을 줄이기 위함 - 계산 편의성 때문
    # img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # img_gray_resized = cv2.resize(img_gray, (256, 256))
    # images.append(img_gray_resized.reshape(256, 256, 1)/255.0)

# 그림 -> pytorch tensor
images_tensor = torch.tensor(images, dtype=torch.float32).permute(0,3,1,2)


In [None]:
os.listdir(folder_path)

In [None]:
z_dim=2

In [None]:
class Autoencoder(nn.Module):
  def __init__(self, z_dim):
    super(Autoencoder, self).__init__()
    # encoder
    self.encoder = nn.Sequential(
        nn.Conv2d(3, 16, 3, stride=2, padding=1), # (N, 1, 256, 256) -> (N, 16, 128, 128)
        nn.ReLU(true),
        nn.Conv2d(16, 32, 3, stride=2, padding=1), # (N, 16, 128, 128) -> (N, 32, 64, 64)
        nn.ReLU(true),
        nn.Conv2d(32, 64, 3, stride=2, padding=1), # (N, 32, 64, 64) -> (N, 64, 32, 32)
        nn.ReLU(true),
        nn.Conv2d(64, 128, 3, stride=2, padding=1), # (N, 64, 32, 32) -> (N, 128, 16, 16)
        nn.ReLU(true),
    )
    # Latent space
    self.latent_space = nn.Linear(128*16*16, z_dim*2)
    # decoder
    self.decoder_mlp = nn.Sequential(
        nn.Linear(z_dim, 128*16*16),
        nn.ReLU(true)
    )
    self.decoder = nn.Sequential(
        nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1), # (N, 128, 16, 16) -> (N, 64, 32, 32)
        nn.ReLU(true),
        nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1), #  (N, 64, 32, 32) -> (N, 32, 64, 64)
        nn.ReLU(true),
        nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1), # (N, 32, 64, 64) -> (N, 16, 128, 128)
        nn.ReLU(true),
        nn.ConvTranspose2d(16, 3, 3, stride=2, padding=1, output_padding=1), # (N, 16, 128, 128) -> (N, 1, 256, 256)
        nn.Sigmoid(),
    )

  def reparameterize(self, mu, logvar):
    std = torch.exp(logvar/2)
    eps = torch.randn_like(std)
    return mu + eps*std

  def encoding(self, x):
    x = self.encoder(x)

    x = x.reshape(x.size(0), -1)
    latent_params = self.latent_space(x)

    mu, logvar = torch.chunk(latent_params, 2, dim=1)
    return z, mu, logvar

  def decoding(self, x):
    x = self.decoder_mlp(x).view(x.size(0), 128, 16, 16)
    x = self.decoder(x)
    return x

  def forward(self, x):
    x = self.encoder(x)
    x = x.reshape(x.size(0), -1)
    latent_params = self.latent_space(x)
    mu, logvar = torch.chunk(latent_params, 2, dim=1)
    z = self.reparameterize(mu, logvar)
    x = self.decoder_mlp(z).view(x.size(0), 128, 16, 16)
    x = self.decoder(x)
    return x, mu, logvar



In [None]:
def loss_function(x_hat, x, mea, log_var):
  loss1 = nn.functional.binary_cross_entropy(x_hat[:, 0], x[:, 0], reduction='sum')
  loss2 = nn.functional.binary_cross_entropy(x_hat[:, 1], x[:, 1], reduction='sum')
  loss3 = nn.functional.binary_cross_entropy(x_hat[:, 2], x[:, 2], reduction='sum')
  KLD = -0.5 * torch.sum(1 + log_var - mea.pow(2) - log_var.exp())
  return loss1 + loss2 + loss3 + KLD

In [None]:
model = Autoencoder(z_dim).cuda()

In [None]:

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
outputs, mu, logvar = model(images_tensor.float().cuda())

In [None]:

num_epochs = 1000
train_losses = []
for epoch in range(num_epochs):
  running_loss = 0.0
  optimizer.zero_grad()
  outputs, mu, logvar = model(images_tensor.float().cuda())
  loss = loss_function(outputs, images_tensor.float().cuda(), mu, logvar)
  loss.backward()
  optimizer.step()
  running_loss += loss.item()
  train_losses.append(running_loss)
  if epoch % 100 == 0:
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss:.4f}')

In [None]:
with torch.no_grad():
  mu, logvar = model.encoding(images_tensor.cuda())
  mu, logvar = mu.detach().cpu().numpy(), logvar.detach().cpu().numpy()

In [None]:
#
for i in range(len(mu)):
  cov = np.zeros((2,2))
  cov[0, 0] = np.exp(0.5*logvar[i, 0])
  cov[1, 1] = np.exp(0.5*logvar[i, 1])
  pts = np.random.multivariate_normal(mu[i], cov, size=1000)
  plt.plot(pts[:, 0], pts[:, 1], '.', alpha=0.5)

plt.axis('equal')
plt.show()

In [None]:

selected_images = images_tensor[:2]
with torch.no_grad():
  reconstructed_images, _, _ = model(selected_images.cuda())
  reconstructed_images = reconstructed_images.permute(0,2,3,1).detach().cpu().numpy()

fig, axes = plt.subplot(2,2, figsize=(8,8))
for i in range(2):
  axes[0, i].imshow(np.flip(selected_images.permute(0,2,3,1)[i].numpy(), -1))
  axes[0, i].set_title('Original Image')
  axes[1, i].imshow(np.flip(reconstructed_images[i], -1))
  axes[1, i].set_title('Reconstructed Image')
plt.show()

특정 클래스에서 그림 만들기

In [None]:
cls_idx=1
x, y = np.random.multivariate_normal(mu[cls_idx], np.diag(np.exp(0.5*logvar[cls_idx])), 10).T

sampled_latent = torch.from_numpy(np.vstack((x,y)).T).cuda().float()

with torch.no_grad():
  reconstructed_images = model.decoding(sampled_latent).clip(0,1).permute(0,2,3,1).detach().cpu().numpy()

fig, axes = plt.subplot(2, 5, figsize=(15, 6))

axes = axes.flatten()

for i, ax in enumerate(axes):
  ax.imshow(np.flip(reconstructed_images[i], -1))
  ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
img_names = ['Monalisa', 'Starry Night', 'The Girl', 'Van Gogh']

In [None]:
# 랜덤하게 골라서 그림을 복원을 시키면 모델이 학습한 것들 기준으로 그 사이에 중첩된 무언가가 튀어나옴
latent = torch.randn(1, z_dim).cuda()
with torch.no_grad():
  interpolated_images = model.decoding(latent).clip(0, 1).permute(0,2,3,1).detach().cpu().numpy()
plt.subplot(121)
for i in range(len(mu)):
  cov = np.zeros((2,2))
  cov[0,0] = np.exp(0.5*logvar[i,0])
  cov[1,1]= np.exp(0.5*logvar[i, 1])
  pts = np.random.multivariate_normal(mu[i], cov, size=1000)
  plt.plot(pts[:, 0], pts[:, 1], label=img_names[i])

plt.scatter(latent.detach().cpu().numpy()[0,0], latent.datech().cpu().numpy()[0,1], label='sample')
plt.xlim([-10, 10])
plt.ylim([-10, 10])
plt.legend()
plt.subplot(122)
plt.imshow(np.flip(interpolated_images[0], -1))