In [None]:
# 실험을 위한 빈 셀
# 여기서 위에서 제안한 실험들을 진행해보세요!

# 예시: 학습률을 바꿔서 재훈련
# learning_rate = 1e-2  # 원래보다 10배 큰 학습률
# model = MLP().to(device)
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#
# 위의 훈련 루프를 다시 실행하고 결과를 비교해보세요!

print("실험을 시작해보세요! 💪")

실험을 시작해보세요! 💪


In [None]:
# 필요한 패키지 설치
!pip install datasets

# 필요한 라이브러리 임포트 및 설정
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from datasets import load_dataset
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter

# GPU 사용 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
else:
    print("Using CPU")

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self, input_size=784, hidden_size=100, num_classes=10):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes)
        )

    def forward(self, x):
        return self.layers(x)

# Sigmoid 활성화 함수를 사용하는 MLP 모델 정의
class SigmoidMLP(nn.Module):
    def __init__(self, input_size=784, hidden_size=100, num_classes=10):
        super(SigmoidMLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.Sigmoid(),  # ReLU 대신 Sigmoid 사용
            nn.Linear(hidden_size, num_classes)
        )

    def forward(self, x):
        return self.layers(x)

# 은닉층 3개를 가진 DeepMLP 모델 정의
class DeepMLP(nn.Module):
    def __init__(self, input_size=784, hidden_size1=256, hidden_size2=128, hidden_size3=64, num_classes=10):
        super(DeepMLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size1),
            nn.ReLU(),
            nn.Linear(hidden_size1, hidden_size2),
            nn.ReLU(),
            nn.Linear(hidden_size2, hidden_size3),
            nn.ReLU(),
            nn.Linear(hidden_size3, num_classes)
        )

    def forward(self, x):
        return self.layers(x)

# 데이터셋 로딩 및 전처리
batch_size = 128
test_batch_size = 1000
nb_epochs = 3
mnist = load_dataset("mnist")
sample_data = torch.stack([transforms.ToTensor()(mnist['train'][i]['image']) for i in range(1000)])
mean = sample_data.mean().item()
std = sample_data.std().item()
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((mean,), (std,))])
def transform_dataset(dataset):
    def transform_fn(batch):
        images = [transform(img).view(-1) for img in batch["image"]]
        return {"image": torch.stack(images), "label": torch.tensor(batch["label"])}
    return dataset.with_transform(transform_fn)

train_dataset = transform_dataset(mnist["train"])
test_dataset = transform_dataset(mnist["test"])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)
criterion = nn.CrossEntropyLoss()

print("\n공통 코드 준비 완료. 이제 아래의 실험 코드를 실행하세요.")

PyTorch version: 2.8.0+cu126
CUDA available: True
GPU: Tesla T4

공통 코드 준비 완료. 이제 아래의 실험 코드를 실행하세요.


In [None]:
# Experiment 1: Learning Rate Tunning
print("=== Experiment: Learning Rate Tunning ===")
learning_rates = [1e-2, 5e-3, 5e-4, 1e-4]
nb_epochs = 3

for lr in learning_rates:
    print(f"\n--- Learning Rate = {lr} ---")
    model = MLP().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_accuracies = []
    test_accuracies = []

    for epoch in range(nb_epochs):
        model.train()
        correct_train = 0
        total_train = 0
        for batch_idx, batch in enumerate(train_loader):
            imgs = batch["image"].to(device)
            labels = batch["label"].to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        epoch_train_acc = 100 * correct_train / total_train
        train_accuracies.append(epoch_train_acc)

        model.eval()
        correct_test = 0
        total_test = 0
        with torch.no_grad():
            for batch in test_loader:
                imgs = batch["image"].to(device)
                labels = batch["label"].to(device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, 1)
                total_test += labels.size(0)
                correct_test += (predicted == labels).sum().item()
        test_acc = 100 * correct_test / total_test
        test_accuracies.append(test_acc)

        print(f"Epoch [{epoch+1}/{nb_epochs}] - Train Acc: {epoch_train_acc:.2f}%, Test Acc: {test_acc:.2f}%")

    print(f"\nFinal Results for LR={lr}")
    print(f"Final Train Accuracy: {train_accuracies[-1]:.2f}%")
    print(f"Final Test Accuracy: {test_accuracies[-1]:.2f}%")

=== Experiment: Learning Rate Tunning ===

--- Learning Rate = 0.01 ---
Epoch [1/3] - Train Acc: 92.11%, Test Acc: 94.75%
Epoch [2/3] - Train Acc: 95.33%, Test Acc: 95.07%
Epoch [3/3] - Train Acc: 95.75%, Test Acc: 95.51%

Final Results for LR=0.01
Final Train Accuracy: 95.75%
Final Test Accuracy: 95.51%

--- Learning Rate = 0.005 ---
Epoch [1/3] - Train Acc: 92.85%, Test Acc: 95.19%
Epoch [2/3] - Train Acc: 96.33%, Test Acc: 95.42%
Epoch [3/3] - Train Acc: 97.12%, Test Acc: 96.92%

Final Results for LR=0.005
Final Train Accuracy: 97.12%
Final Test Accuracy: 96.92%

--- Learning Rate = 0.0005 ---
Epoch [1/3] - Train Acc: 88.97%, Test Acc: 93.47%
Epoch [2/3] - Train Acc: 94.43%, Test Acc: 95.54%
Epoch [3/3] - Train Acc: 95.99%, Test Acc: 96.29%

Final Results for LR=0.0005
Final Train Accuracy: 95.99%
Final Test Accuracy: 96.29%

--- Learning Rate = 0.0001 ---
Epoch [1/3] - Train Acc: 81.00%, Test Acc: 90.01%
Epoch [2/3] - Train Acc: 90.55%, Test Acc: 91.59%
Epoch [3/3] - Train Acc: 92.

In [None]:
# Experiment 2: Hidden Size Tunning
print("=== Experiment: Hidden Size Tunning ===")
hidden_sizes = [50, 200, 500, 1000]
learning_rate = 1e-3
nb_epochs = 3

for hs in hidden_sizes:
    print(f"\n--- Hidden Size = {hs} ---")
    model = MLP(hidden_size=hs).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_accuracies = []
    test_accuracies = []

    for epoch in range(nb_epochs):
        model.train()
        correct_train = 0
        total_train = 0
        for batch_idx, batch in enumerate(train_loader):
            imgs = batch["image"].to(device)
            labels = batch["label"].to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        epoch_train_acc = 100 * correct_train / total_train
        train_accuracies.append(epoch_train_acc)

        model.eval()
        correct_test = 0
        total_test = 0
        with torch.no_grad():
            for batch in test_loader:
                imgs = batch["image"].to(device)
                labels = batch["label"].to(device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, 1)
                total_test += labels.size(0)
                correct_test += (predicted == labels).sum().item()
        test_acc = 100 * correct_test / total_test
        test_accuracies.append(test_acc)

        print(f"Epoch [{epoch+1}/{nb_epochs}] - Train Acc: {epoch_train_acc:.2f}%, Test Acc: {test_acc:.2f}%")

    print(f"\nFinal Results for Hidden Size={hs}")
    print(f"Final Train Accuracy: {train_accuracies[-1]:.2f}%")
    print(f"Final Test Accuracy: {test_accuracies[-1]:.2f}%")

=== Experiment: Hidden Size Tunning ===

--- Hidden Size = 50 ---
Epoch [1/3] - Train Acc: 89.51%, Test Acc: 93.71%
Epoch [2/3] - Train Acc: 94.27%, Test Acc: 95.27%
Epoch [3/3] - Train Acc: 95.62%, Test Acc: 96.00%

Final Results for Hidden Size=50
Final Train Accuracy: 95.62%
Final Test Accuracy: 96.00%

--- Hidden Size = 200 ---
Epoch [1/3] - Train Acc: 91.78%, Test Acc: 95.68%
Epoch [2/3] - Train Acc: 96.58%, Test Acc: 97.04%
Epoch [3/3] - Train Acc: 97.61%, Test Acc: 96.95%

Final Results for Hidden Size=200
Final Train Accuracy: 97.61%
Final Test Accuracy: 96.95%

--- Hidden Size = 500 ---
Epoch [1/3] - Train Acc: 93.02%, Test Acc: 96.24%
Epoch [2/3] - Train Acc: 97.21%, Test Acc: 97.59%
Epoch [3/3] - Train Acc: 98.19%, Test Acc: 97.61%

Final Results for Hidden Size=500
Final Train Accuracy: 98.19%
Final Test Accuracy: 97.61%

--- Hidden Size = 1000 ---
Epoch [1/3] - Train Acc: 93.72%, Test Acc: 96.99%
Epoch [2/3] - Train Acc: 97.42%, Test Acc: 97.02%
Epoch [3/3] - Train Acc: 98

In [None]:
# Experiment 3: Epochs Tunning
print("=== Experiment: Epochs Tunning ===")
epochs_list = [5, 10, 20, 30]
learning_rate = 1e-3
hidden_size = 100

for nb_epochs in epochs_list:
    print(f"\n--- Number of Epochs = {nb_epochs} ---")
    model = MLP(hidden_size=hidden_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_accuracies = []
    test_accuracies = []

    for epoch in range(nb_epochs):
        model.train()
        correct_train = 0
        total_train = 0
        for batch_idx, batch in enumerate(train_loader):
            imgs = batch["image"].to(device)
            labels = batch["label"].to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        epoch_train_acc = 100 * correct_train / total_train
        train_accuracies.append(epoch_train_acc)

        model.eval()
        correct_test = 0
        total_test = 0
        with torch.no_grad():
            for batch in test_loader:
                imgs = batch["image"].to(device)
                labels = batch["label"].to(device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, 1)
                total_test += labels.size(0)
                correct_test += (predicted == labels).sum().item()
        test_acc = 100 * correct_test / total_test
        test_accuracies.append(test_acc)

        print(f"Epoch [{epoch+1}/{nb_epochs}] - Train Acc: {epoch_train_acc:.2f}%, Test Acc: {test_acc:.2f}%")

    print(f"\nFinal Results for Epochs={nb_epochs}")
    print(f"Final Train Accuracy: {train_accuracies[-1]:.2f}%")
    print(f"Final Test Accuracy: {test_accuracies[-1]:.2f}%")

=== Experiment: Epochs Tunning ===

--- Number of Epochs = 5 ---
Epoch [1/5] - Train Acc: 90.84%, Test Acc: 94.77%
Epoch [2/5] - Train Acc: 95.76%, Test Acc: 96.16%
Epoch [3/5] - Train Acc: 96.91%, Test Acc: 96.73%
Epoch [4/5] - Train Acc: 97.63%, Test Acc: 97.22%
Epoch [5/5] - Train Acc: 98.06%, Test Acc: 97.16%

Final Results for Epochs=5
Final Train Accuracy: 98.06%
Final Test Accuracy: 97.16%

--- Number of Epochs = 10 ---
Epoch [1/10] - Train Acc: 90.75%, Test Acc: 94.61%
Epoch [2/10] - Train Acc: 95.67%, Test Acc: 95.91%
Epoch [3/10] - Train Acc: 96.91%, Test Acc: 96.92%
Epoch [4/10] - Train Acc: 97.68%, Test Acc: 96.89%
Epoch [5/10] - Train Acc: 98.06%, Test Acc: 97.50%
Epoch [6/10] - Train Acc: 98.34%, Test Acc: 97.81%
Epoch [7/10] - Train Acc: 98.56%, Test Acc: 97.59%
Epoch [8/10] - Train Acc: 98.90%, Test Acc: 97.56%
Epoch [9/10] - Train Acc: 99.03%, Test Acc: 97.87%
Epoch [10/10] - Train Acc: 99.13%, Test Acc: 97.79%

Final Results for Epochs=10
Final Train Accuracy: 99.13%


In [None]:
# Experiment 4: Sigmoid Activation Function
print("=== Experiment: Sigmoid Activation Function ===")
learning_rate = 1e-3
nb_epochs = 3

model_sigmoid = SigmoidMLP().to(device)
optimizer_sigmoid = optim.Adam(model_sigmoid.parameters(), lr=learning_rate)

train_accuracies_sigmoid = []
test_accuracies_sigmoid = []

for epoch in range(nb_epochs):
    model_sigmoid.train()
    correct_train = 0
    total_train = 0
    for batch_idx, batch in enumerate(train_loader):
        imgs = batch["image"].to(device)
        labels = batch["label"].to(device)
        optimizer_sigmoid.zero_grad()
        outputs = model_sigmoid(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_sigmoid.step()
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_train_acc = 100 * correct_train / total_train
    train_accuracies_sigmoid.append(epoch_train_acc)

    model_sigmoid.eval()
    correct_test = 0
    total_test = 0
    with torch.no_grad():
        for batch in test_loader:
            imgs = batch["image"].to(device)
            labels = batch["label"].to(device)
            outputs = model_sigmoid(imgs)
            _, predicted = torch.max(outputs, 1)
            total_test += labels.size(0)
            correct_test += (predicted == labels).sum().item()
    test_acc = 100 * correct_test / total_test
    test_accuracies_sigmoid.append(test_acc)

    print(f"Epoch [{epoch+1}/{nb_epochs}] - Train Acc: {epoch_train_acc:.2f}%, Test Acc: {test_acc:.2f}%")

print("\nFinal Results for Sigmoid MLP")
print(f"Final Train Accuracy: {train_accuracies_sigmoid[-1]:.2f}%")
print(f"Final Test Accuracy: {test_accuracies_sigmoid[-1]:.2f}%")

=== Experiment: Sigmoid Activation Function ===
Epoch [1/3] - Train Acc: 87.91%, Test Acc: 92.62%
Epoch [2/3] - Train Acc: 93.58%, Test Acc: 94.45%
Epoch [3/3] - Train Acc: 95.06%, Test Acc: 95.13%

Final Results for Sigmoid MLP
Final Train Accuracy: 95.06%
Final Test Accuracy: 95.13%
