In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T

from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchmetrics.aggregation import MeanMetric
from torchmetrics.functional.classification import accuracy

In [2]:
# Build config
title = 'cifar10_mlp'
data_root = 'data'
batch_size = 32
num_workers = 8
device = 'cuda' if torch.cuda.is_available() else 'cpu'
base_lr = 0.001
epochs = 40

log_dir = 'log'
checkpoint_dir = 'checkpoint'

os.makedirs(log_dir, exist_ok = True) # 없어도 무방한 코드
os.makedirs(checkpoint_dir, exist_ok = True)

In [3]:
# Build dataset
transform = T.Compose([
    T.ToTensor(),
    T.Normalize((0.5, 0.5, 0.5), (0.25, 0.25, 0.25)),
])
train_data = CIFAR10(data_root, train=True, download=True, transform=transform)
train_loader = DataLoader(train_data, batch_size, shuffle=True, num_workers=num_workers, drop_last=True)

val_data = CIFAR10(data_root, train=False, download=True, transform=transform)
val_loader = DataLoader(val_data, batch_size=batch_size, num_workers=num_workers)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
# Define model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 10)
        self.act = nn.ReLU()

    def forward(self, x):
        x = x.reshape((x.shape[0], -1))
        x = self.act(self.fc1(x))
        x = self.act(self.fc2(x))
        x = self.fc3(x)
        return x

model = MLP()
model = model.to(device)

In [5]:
# Build optimizer 
optimizer = optim.Adam(model.parameters(), lr=base_lr)

# Build scheduler
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs * len(train_loader))

# Build loss function
loss_fn = nn.CrossEntropyLoss()

# Build metric function
metric_fn = accuracy

# Build logger
train_logger = SummaryWriter(f'{log_dir}/train')
val_logger = SummaryWriter(f'{log_dir}/val')

In [6]:
# Define training loop 
def train(loader, model, optimizer, scheduler, loss_fn, metric_fn, device):
    model.train()
    loss_mean = MeanMetric().to(device)
    metric_mean = MeanMetric().to(device)
    
    for inputs, targets in loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        metric = metric_fn(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_mean.update(loss)
        metric_mean.update(metric)

        scheduler.step()

    summary = {'loss': loss_mean.compute(), 'metric': metric_mean.compute()}

    return summary

In [7]:
# Define evaluation loop 
def evaluate(loader, model, loss_fn, metric_fn, device):
    model.eval()
    loss_mean = MeanMetric().to(device)
    metric_mean = MeanMetric().to(device)
    
    for inputs, targets in loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        with torch.no_grad():
            outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        metric = metric_fn(outputs, targets)

        loss_mean.update(loss)
        metric_mean.update(metric)
    
    summary = {'loss': loss_mean.compute(), 'metric': metric_mean.compute()}

    return summary

In [8]:
# Main loop
best_acc = 0.0
for epoch in range(epochs):
    train_summary = train(train_loader, model, optimizer, scheduler, loss_fn, metric_fn, device)
    val_summary = evaluate(val_loader, model, loss_fn, metric_fn, device)
    
    # Write log
    train_logger.add_scalar('Loss', train_summary['loss'], epoch + 1)
    train_logger.add_scalar('Accuracy', train_summary['metric'], epoch + 1)
    val_logger.add_scalar('Loss', val_summary['loss'], epoch + 1)
    val_logger.add_scalar('Accuracy', val_summary['metric'], epoch + 1)
    
    # Save model
    state_dict = {
        'epoch': epoch + 1,
        'model': model.state_dict(), # 얘는 뭐지???
        'optimizer': optimizer.state_dict()
    }
    checkpoint_path = f'{checkpoint_dir}/{title}_last.pth'
    torch.save(state_dict, checkpoint_path)
    
    # print log
    print((f'Epoch {epoch+1}: '
           + f'Train Loss {train_summary["loss"]:.04f}, ' 
           + f'Train Accuracy {train_summary["metric"]:.04f}, '
           + f'Test Loss {val_summary["loss"]:.04f}, '
           + f'Test Accuracy {val_summary["metric"]:.04f}'))
    
train_logger.close()
val_logger.close()

Epoch 1: Train Loss 1.6580, Train Accuracy 0.4121, Test Loss 1.5403, Test Accuracy 0.4536
Epoch 2: Train Loss 1.4745, Train Accuracy 0.4763, Test Loss 1.4668, Test Accuracy 0.4870
Epoch 3: Train Loss 1.3848, Train Accuracy 0.5103, Test Loss 1.4456, Test Accuracy 0.4913
Epoch 4: Train Loss 1.3139, Train Accuracy 0.5339, Test Loss 1.4160, Test Accuracy 0.5081
Epoch 5: Train Loss 1.2526, Train Accuracy 0.5556, Test Loss 1.4177, Test Accuracy 0.5105
Epoch 6: Train Loss 1.2034, Train Accuracy 0.5737, Test Loss 1.4280, Test Accuracy 0.5102
Epoch 7: Train Loss 1.1532, Train Accuracy 0.5926, Test Loss 1.4382, Test Accuracy 0.5064
Epoch 8: Train Loss 1.1014, Train Accuracy 0.6051, Test Loss 1.4191, Test Accuracy 0.5205
Epoch 9: Train Loss 1.0562, Train Accuracy 0.6238, Test Loss 1.4740, Test Accuracy 0.5124
Epoch 10: Train Loss 1.0123, Train Accuracy 0.6379, Test Loss 1.4876, Test Accuracy 0.5133
Epoch 11: Train Loss 0.9717, Train Accuracy 0.6521, Test Loss 1.4898, Test Accuracy 0.5113
Epoch 12

In [8]:
# # Load model and optimizer states
# checkpoint_path = f'{checkpoint_dir}/{title}_last.pth'
# state_dict = torch.load(checkpoint_path)
                        
# start_epoch = state_dict['epoch']
# model.load_state_dict(state_dict['model'])
# optimizer.load_state_dict(state_dict['optimizer'])

# # Main loop
# best_acc = 0.0
# for epoch in range(start_epoch, epochs):
#     train_summary = train(train_loader, model, optimizer, scheduler, loss_fn, metric_fn, device)
#     val_summary = evaluate(val_loader, model, loss_fn, metric_fn, device) 
    
#     # Write log
#     train_logger.add_scalar('Loss', train_summary['loss'], epoch + 1)
#     train_logger.add_scalar('Accuracy', train_summary['metric'], epoch + 1)
#     val_logger.add_scalar('Loss', val_summary['loss'], epoch + 1)
#     val_logger.add_scalar('Accuracy', val_summary['metric'], epoch + 1)
    
#     # Save model
#     state_dict = {
#         'epoch': epoch + 1,
#         'model': model.state_dict(), # 얘는 뭐지???
#         'optimizer': optimizer.state_dict(), 
#     }
#     checkpoint_path = f'{checkpoint_dir}/{title}_last.pth'
#     torch.save(state_dict, checkpoint_path)
    
#     # print log
#     print((f'Epoch {epoch+1}: '
#            + f'Train Loss {train_summary["loss"]:.04f}, ' 
#            + f'Train Accuracy {train_summary["metric"]:.04f}, '
#            + f'Test Loss {val_summary["loss"]:.04f}, '
#            + f'Test Accuracy {val_summary["metric"]:.04f}'))
    
# train_logger.close()
# val_logger.close()

Epoch 27: Train Loss 0.0737, Train Accuracy 0.9744, Test Loss 3.6086, Test Accuracy 0.5452
Epoch 28: Train Loss 0.0787, Train Accuracy 0.9730, Test Loss 3.7344, Test Accuracy 0.5481
Epoch 29: Train Loss 0.0737, Train Accuracy 0.9745, Test Loss 3.8177, Test Accuracy 0.5471
Epoch 30: Train Loss 0.0727, Train Accuracy 0.9756, Test Loss 3.8009, Test Accuracy 0.5491
Epoch 31: Train Loss 0.0724, Train Accuracy 0.9762, Test Loss 4.1292, Test Accuracy 0.5448
Epoch 32: Train Loss 0.0620, Train Accuracy 0.9787, Test Loss 4.0428, Test Accuracy 0.5501
Epoch 33: Train Loss 0.0539, Train Accuracy 0.9821, Test Loss 4.0934, Test Accuracy 0.5499
Epoch 34: Train Loss 0.0520, Train Accuracy 0.9832, Test Loss 4.1741, Test Accuracy 0.5475
Epoch 35: Train Loss 0.0521, Train Accuracy 0.9825, Test Loss 4.2767, Test Accuracy 0.5458
Epoch 36: Train Loss 0.0485, Train Accuracy 0.9829, Test Loss 4.3529, Test Accuracy 0.5519
Epoch 37: Train Loss 0.0395, Train Accuracy 0.9874, Test Loss 4.4514, Test Accuracy 0.5462