In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

# 設定設備（GPU 或 CPU）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# 定義 Autoencoder 模型
class Autoencoder(nn.Module):
    def __init__(self, latent_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Flatten(),  # 把 28x28 影像展平成 784 維向量
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Linear(256, latent_dim),  # 壓縮到 latent_dim 維度
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 784),
            nn.Sigmoid()  # 確保輸出範圍在 [0,1]
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon.view(-1, 1, 28, 28)  # 轉回 28x28 影像格式

In [None]:
class ConvAutoencoder(nn.Module):
    def __init__(self, latent_dim=32):
        super(ConvAutoencoder, self).__init__()

        # **Encoder（編碼器）：使用卷積層**
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),  # 輸入: (1, 28, 28) -> (16, 14, 14)
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), # (16, 14, 14) -> (32, 7, 7)
            nn.BatchNorm2d(32),  # 加入 Batch Normalization
            nn.ReLU(),
            nn.Flatten(),  # 攤平成一維
            nn.Linear(32 * 7 * 7, latent_dim)  # 壓縮到 latent_dim
        )

        # **Decoder（解碼器）：使用轉置卷積層（ConvTranspose2d）**
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 32 * 7 * 7),
            nn.ReLU(),
            nn.Unflatten(1, (32, 7, 7)),  # 恢復空間結構
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1), # (32,7,7) -> (16,14,14)
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1),  # (16,14,14) -> (1,28,28)
            nn.Sigmoid()  # 確保輸出值在 0~1 之間
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [None]:
#  訓練函數
def train_autoencoder(model, train_loader, num_epochs=10):
    model.to(device)
    criterion = nn.MSELoss()  # 使用 MSE 作為損失函數
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        total_loss = 0
        for images, _ in train_loader:
            images = images.to(device)
            outputs = model(images)
            loss = criterion(outputs, images)  # 計算 MSE 誤差

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")


In [None]:
#  計算誤差（MSE 和 L1）
def evaluate_autoencoder(model, data_loader):
    model.to(device)
    model.eval()
    
    mse_loss = nn.MSELoss()
    l1_loss = nn.L1Loss()
    
    total_mse, total_l1 = 0, 0
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            
            total_mse += mse_loss(outputs, images).item()
            total_l1 += l1_loss(outputs, images).item()
    
    avg_mse = total_mse / len(data_loader)
    avg_l1 = total_l1 / len(data_loader)
    
    return avg_mse, avg_l1

In [None]:
# 顯示重建影像（4x4 網格）
def show_reconstructed_images(model, data_loader, num_images=16):
    model.to(device)
    model.eval()
    
    images, _ = next(iter(data_loader))  # 取一批數據
    images = images[:num_images].to(device)
    
    with torch.no_grad():
        outputs = model(images)

    # 轉換為 numpy 格式
    images = images.cpu().numpy().squeeze()
    outputs = outputs.cpu().numpy().squeeze()

        # 顯示原始影像
    fig, axes = plt.subplots(4, 4, figsize=(6, 6))
    for i in range(4):
        for j in range(4):
            axes[i, j].imshow(images[i * 4 + j], cmap='gray')
            axes[i, j].axis('off')
    plt.suptitle("Original Images")
    plt.show()

    # 顯示重建影像
    fig, axes = plt.subplots(4, 4, figsize=(6, 6))
    for i in range(4):
        for j in range(4):
            axes[i, j].imshow(outputs[i * 4 + j], cmap='gray')
            axes[i, j].axis('off')
    plt.suptitle("Reconstructed Images")
    plt.show()

In [None]:
# 下載並載入 MNIST 數據集
transform = transforms.Compose([transforms.ToTensor()])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)

In [None]:
#  訓練並評估 Autoencoder（測試不同 latent_dim）
latent_dims = [64, 32, 4]

for latent_dim in latent_dims:
    print(f"\nTraining Autoencoder with latent dimension = {latent_dim}")
    # autoencoder = Autoencoder(latent_dim)
    autoencoder = ConvAutoencoder(latent_dim)
    train_autoencoder(autoencoder, train_loader, num_epochs=20)

    train_mse, train_l1 = evaluate_autoencoder(autoencoder, train_loader)
    test_mse, test_l1 = evaluate_autoencoder(autoencoder, test_loader)

    print(f"Training MSE: {train_mse:.4f}, L1 Loss: {train_l1:.4f}")
    print(f"Testing  MSE: {test_mse:.4f}, L1 Loss: {test_l1:.4f}")

    # 顯示重建影像
    print("Training Images Reconstruction:")
    show_reconstructed_images(autoencoder, train_loader)

    print("Testing Images Reconstruction:")
    show_reconstructed_images(autoencoder, test_loader)

In [None]:
# 下載並載入 FashionMNIST 數據集
transform = transforms.Compose([transforms.ToTensor()])

train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)

In [None]:
#  訓練並評估 Autoencoder（測試不同 latent_dim）
latent_dims = [64, 32, 4]

for latent_dim in latent_dims:
    print(f"\nTraining Autoencoder with latent dimension = {latent_dim}")
    # autoencoder = Autoencoder(latent_dim)
    autoencoder = ConvAutoencoder(latent_dim)
    train_autoencoder(autoencoder, train_loader, num_epochs=20)

    train_mse, train_l1 = evaluate_autoencoder(autoencoder, train_loader)
    test_mse, test_l1 = evaluate_autoencoder(autoencoder, test_loader)

    print(f"Training MSE: {train_mse:.4f}, L1 Loss: {train_l1:.4f}")
    print(f"Testing  MSE: {test_mse:.4f}, L1 Loss: {test_l1:.4f}")

    # 顯示重建影像
    print("Training Images Reconstruction:")
    show_reconstructed_images(autoencoder, train_loader)

    print("Testing Images Reconstruction:")
    show_reconstructed_images(autoencoder, test_loader)