# (3) Python Code:

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch.utils.data import DataLoader, random_split
from torch.optim import SGD, Adam, RMSprop, Adagrad

# 데이터셋 설정
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

train_set = torchvision.datasets.FashionMNIST(root='./data',
                                              train=True,
                                              download=True,
                                              transform=transform)
test_set = torchvision.datasets.FashionMNIST(root='./data',
                                             train=False,
                                             download=True,
                                             transform=transform)

# 훈련 데이터셋을 훈련 및 검증 세트로 분할
num_train = len(train_set)
num_val = int(num_train * 0.1)
num_train -= num_val
train_ds, val_ds = random_split(train_set, [num_train, num_val])

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 17402961.41it/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 303616.03it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 5472195.53it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 15036404.59it/s]


Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw



In [None]:

# 모델 정의
class MLP(nn.Module):
    def __init__(self, activation_func=nn.ReLU()):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten()
        self.layer1 = nn.Linear(28*28, 256)
        self.activation = activation_func
        self.layer2 = nn.Linear(256, 128)
        self.output = nn.Linear(128, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.activation(self.layer1(x))
        x = self.activation(self.layer2(x))
        x = self.output(x)
        return x

# 하이퍼파라미터 설정
activation_functions = [nn.ReLU(), nn.Tanh(), nn.Softmax(dim=1)]
learning_rates = [0.001, 0.01, 0.1]
epochs = 30
patience = 5

# 최적화 함수 설정
optimizer_functions = {
    'SGD': lambda params, lr: SGD(params, lr=lr),
    'Momentum': lambda params, lr: SGD(params, lr=lr, momentum=0.9),
    'Adagrad': lambda params, lr: Adagrad(params, lr=lr),
    'RMSprop': lambda params, lr: RMSprop(params, lr=lr),
    'Adam': lambda params, lr: Adam(params, lr=lr)
}


In [None]:

# 결과 저장을 위한 딕셔너리
results = {}

for activation_func in activation_functions:
    for lr in learning_rates:
        for opt_name, opt_func in optimizer_functions.items():
            # 모델 초기화
            model = MLP(activation_func=activation_func)
            optimizer = opt_func(model.parameters(), lr=lr)
            loss_fn = nn.CrossEntropyLoss()

            best_val_loss = float('inf')
            counter = 0

            for epoch in range(epochs):
                # 훈련 부분
                model.train()
                for batch, (X, y) in enumerate(train_loader):
                    pred = model(X)
                    loss = loss_fn(pred, y)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                # 검증 부분
                val_loss = 0.0
                model.eval()
                with torch.no_grad():
                    for X, y in val_loader:
                        pred = model(X)
                        loss = loss_fn(pred, y)
                        val_loss += loss.item()
                val_loss /= len(val_loader)

                # Early Stopping 체크
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    counter = 0
                else:
                    counter += 1
                    if counter >= patience:
                        print(f"Early stopping triggered at epoch {epoch} for activation {activation_func}, learning rate {lr}, optimizer {opt_name}")
                        break

            # 최종 평가
            correct = 0
            total = 0
            with torch.no_grad():
                for X, y in test_loader:
                    pred = model(X)
                    correct += (pred.argmax(1) == y).type(torch.float).sum().item()
                    total += y.size(0)
            accuracy = correct / total
            print(f"Activation: {activation_func}, LR: {lr}, Optimizer: {opt_name}, Accuracy: {accuracy}")
            results[(str(activation_func), lr, opt_name)] = accuracy


Activation: ReLU(), LR: 0.001, Optimizer: SGD, Accuracy: 0.8222
Activation: ReLU(), LR: 0.001, Optimizer: Momentum, Accuracy: 0.874
Activation: ReLU(), LR: 0.001, Optimizer: Adagrad, Accuracy: 0.8559
Early stopping triggered at epoch 10 for activation ReLU(), learning rate 0.001, optimizer RMSprop
Activation: ReLU(), LR: 0.001, Optimizer: RMSprop, Accuracy: 0.8765
Early stopping triggered at epoch 15 for activation ReLU(), learning rate 0.001, optimizer Adam
Activation: ReLU(), LR: 0.001, Optimizer: Adam, Accuracy: 0.8849
Activation: ReLU(), LR: 0.01, Optimizer: SGD, Accuracy: 0.8711
Early stopping triggered at epoch 10 for activation ReLU(), learning rate 0.01, optimizer Momentum
Activation: ReLU(), LR: 0.01, Optimizer: Momentum, Accuracy: 0.8747
Early stopping triggered at epoch 20 for activation ReLU(), learning rate 0.01, optimizer Adagrad
Activation: ReLU(), LR: 0.01, Optimizer: Adagrad, Accuracy: 0.8895
Early stopping triggered at epoch 15 for activation ReLU(), learning rate 0.0

## 최종결론: 가장 좋은 정확성은 0.8895이며 실험을 통해 획득한 최적 Architecture는 아래와 같다.

## 1) Epochs = 20
## 2) Activation: ReLU()
## 3) LR: 0.01
## 4) Optimizer: Adagrad
