In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch import nn, optim
from torch.nn import functional as F
from torchvision.datasets import MNIST
from torchvision.transforms import transforms
from models.classifier import ClassifierForMNIST

In [2]:
BATCH_SIZE = 128
ETA = 1e-3
EPOCHS = 10

In [3]:
def compute_accuracy(ps, labels):
    topk_vals, topk_indices = ps.topk(k=1, dim=1)
    equality = (topk_indices.squeeze() == labels).float()
    acc = equality.mean()
    return acc

In [4]:
def train_step(context, x, y):
    model = context["model"]
    optimizer = context["optimizer"]
    
    optimizer.zero_grad()
    
    N = x.size(0)
    x = x.view(N, -1)
    
    logps = model(x)
    loss = F.nll_loss(logps, y.long()) # nn.NLLLoss()(ps, y) 와 같음
    
    loss.backward()
    optimizer.step()
    
    ps = torch.exp(logps)
    acc = compute_accuracy(ps, y.long())
    
    return loss.item(), acc.item()

In [5]:
def eval_step(context, x, y):
    model = context["model"]
    
    N = x.size(0)
    x = x.view(N, -1)
    
    logps = model(x)
    loss = F.nll_loss(logps, y.long()) # nn.NLLLoss()(ps, y) 와 같음
    
    ps = torch.exp(logps)
    acc = compute_accuracy(ps, y.long())
    
    return loss.item(), acc.item()

In [6]:
def train():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (1.0,))
    ])
    
    trainset = MNIST("../data", transform=transform, download=False, train=True)
    train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
    
    testset = MNIST("../data", transform=transform, download=False, train=False)
    test_loader = DataLoader(testset, batch_size=BATCH_SIZE)
    
    model = ClassifierForMNIST().cuda()
    optimizer = optim.Adam(model.parameters())
    
    context = {
        "model": model,
        "optimizer": optimizer
    }
    
    for e in range(EPOCHS):
        train_loss = 0.0
        train_acc = 0.0
        test_loss = 0.0
        test_acc = 0.0
        
        model.train()
        
        for x, y in train_loader:
            x = x.cuda()
            y = y.float().cuda()
            
            loss, acc = train_step(context, x, y)
            train_loss += loss
            train_acc += acc
            
        model.eval()
            
        for x, y in test_loader:
            x = x.cuda()
            y = y.float().cuda()
            
            with torch.no_grad():
                loss, acc = eval_step(context, x, y)
                test_loss += loss
                test_acc += acc
                
        train_loss /= len(train_loader)
        train_acc /= len(train_loader)
        test_loss /= len(test_loader)
        test_acc /= len(test_loader)
        
        print(f"Epochs {e+1}/{EPOCHS}")
        print(f"Train loss: {train_loss:.8f}, train acc: {train_acc:.4f}")
        print(f"Test loss: {test_loss:.8f}, test acc: {test_acc:.4f}")

In [7]:
train()

Epochs 1/10
Train loss: 0.47414244, train acc: 0.8632
Test loss: 0.27440646, test acc: 0.9179
Epochs 2/10
Train loss: 0.23155058, train acc: 0.9311
Test loss: 0.17523321, test acc: 0.9478
Epochs 3/10
Train loss: 0.16979344, train acc: 0.9493
Test loss: 0.14936868, test acc: 0.9532
Epochs 4/10
Train loss: 0.13020549, train acc: 0.9602
Test loss: 0.11907259, test acc: 0.9633
Epochs 5/10
Train loss: 0.10879804, train acc: 0.9669
Test loss: 0.10857096, test acc: 0.9663
Epochs 6/10
Train loss: 0.09262586, train acc: 0.9711
Test loss: 0.09888093, test acc: 0.9698
Epochs 7/10
Train loss: 0.08060006, train acc: 0.9747
Test loss: 0.09167872, test acc: 0.9721
Epochs 8/10
Train loss: 0.07117876, train acc: 0.9783
Test loss: 0.08615670, test acc: 0.9722
Epochs 9/10
Train loss: 0.06306767, train acc: 0.9801
Test loss: 0.09740640, test acc: 0.9699
Epochs 10/10
Train loss: 0.06005674, train acc: 0.9807
Test loss: 0.08176941, test acc: 0.9744
