In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import numpy as np
import idx2numpy
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

https://youtu.be/zp8clK9yCro


In [None]:
MNIST_DIR = "mnist/"

X_mnist = idx2numpy.convert_from_file(MNIST_DIR + "train-images-idx3-ubyte")
X_mnist = X_mnist.reshape(60000, -1) / 255.0
y_mnist = idx2numpy.convert_from_file(MNIST_DIR + "train-labels-idx1-ubyte")

X_train, X_test, y_train, y_test = train_test_split(
    X_mnist, y_mnist, test_size=0.2, random_state=42)

x = torch.from_numpy(X_train.astype(np.float32))
y = torch.from_numpy(y_train.astype(np.int64))

In [None]:
class MnistDataset(Dataset):
    def __init__(self, X, y):
        self.x = torch.from_numpy(X.astype(np.float32))
        self.y = torch.from_numpy(y.astype(np.int64))

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.x.shape[0]

In [None]:
dataset_train = MnistDataset(X_train, y_train)
data_loader_train = DataLoader(
    dataset=dataset_train, 
    batch_size=64, 
    shuffle=True)

dataset_test = MnistDataset(X_test, y_test)
data_loader_test = DataLoader(
    dataset=dataset_test, 
    batch_size=64, 
    shuffle=False)

In [None]:
image, label = next(iter(dataset_train))

In [None]:
print(torch.min(image), torch.max(image))
print(image.shape)

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(784, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 12),
            nn.ReLU(),
            nn.Linear(12, 3)
        )

        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.ReLU(),
            nn.Linear(12, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 784),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [None]:
model = AutoEncoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(10):
    for images, _ in data_loader_train:
        reconstructed_images = model(images)
        loss = criterion(reconstructed_images, images)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch: {epoch + 1}, Loss: {loss.item():.4f}")

In [None]:
def plot_images(images, title):
    plt.figure(figsize=(6, 6))
    plt.box(False)
    plt.xticks([])
    plt.yticks([])
    plt.title(title)
    for i in range(9):
        img = images[i].numpy().reshape((28, 28))
        plt.subplot(3, 3, i+1)
        plt.xticks([])
        plt.yticks([])
        plt.imshow(img, cmap="gray")

    plt.tight_layout()
    plt.show()

In [None]:
images, _ = next(iter(data_loader_test))
plot_images(images, "Source Images")

In [None]:
reconstructed_images = None
with torch.no_grad():
    reconstructed_images = model(images)
print(reconstructed_images.shape)

In [None]:
plot_images(reconstructed_images, "Reconstructed Images")

In [None]:
encoded_images = None
with torch.no_grad():
    encoded_images = model.encoder(images)

print(encoded_images.shape)

decoded_images = None
with torch.no_grad():
    decoded_images = model.decoder(encoded_images)

print(decoded_images.shape)

In [None]:
plot_images(decoded_images, "Decoded Images")

In [None]:
X_mnist = idx2numpy.convert_from_file(MNIST_DIR + "train-images-idx3-ubyte")
X_mnist = X_mnist.reshape((-1, 1, 28, 28)) / 255.0
y_mnist = idx2numpy.convert_from_file(MNIST_DIR + "train-labels-idx1-ubyte")

X_train, X_test, y_train, y_test = train_test_split(
    X_mnist, y_mnist, test_size=0.2, random_state=42)

x = torch.from_numpy(X_train.astype(np.float32))
y = torch.from_numpy(y_train.astype(np.int64))

dataset_train = MnistDataset(X_train, y_train)
data_loader_train = DataLoader(
    dataset=dataset_train, 
    batch_size=64, 
    shuffle=True)

dataset_test = MnistDataset(X_test, y_test)
data_loader_test = DataLoader(
    dataset=dataset_test, 
    batch_size=64, 
    shuffle=False)

In [None]:
image, label = next(iter(dataset_train))
print(image.shape, torch.min(image), torch.max(image))
image = image.numpy()
print(image.shape)
image = image.squeeze()
print(image.shape)
plt.imshow(image, cmap="gray")
plt.show()

In [None]:
for images, _ in data_loader_train:
    print("Original shape:", images.shape)
    x = nn.Conv2d(1, 16, 3, stride=2, padding=1)(images)
    print("After Conv2d(64, 16, 3, stride=2, padding=1):", x.shape)
    x = nn.Conv2d(16, 32, 3, stride=2, padding=1)(x)
    print("After Conv2d(16, 32, 3, stride=2, padding=1):", x.shape)
    x = nn.Conv2d(32, 64, 7)(x)
    print("After Conv2d(32, 64, 7):", x.shape)
    x = nn.ConvTranspose2d(64, 32, 7)(x)
    print("After ConvTranspose2d(64, 32, 7):", x.shape)
    x = nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1)(x)
    print("After ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1):", x.shape)
    x = nn.ConvTranspose2d(16, 1, 3, stride=2, padding=1, output_padding=1)(x)
    print("After ConvTranspose2d(16, 1, 3, stride=2, padding=1, output_padding=1):", x.shape)

    break

In [None]:
class AutoEncoderCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 7)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 7),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [None]:
# 3.5 dk
model_cnn = AutoEncoderCNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model_cnn.parameters(), lr=0.001)

for epoch in range(10):
    for images, _ in data_loader_train:
        reconstructed_images = model_cnn(images)
        loss = criterion(reconstructed_images, images)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch: {epoch + 1}, Loss: {loss.item():.4f}")

In [None]:
images, _ = next(iter(data_loader_test))
plot_images(images, "Source Images")

In [None]:
reconstructed_images = None
with torch.no_grad():
    reconstructed_images = model_cnn(images)
print(reconstructed_images.shape)

plot_images(reconstructed_images, "Reconstructed Images")