In [1]:
import os.path

import torch
from torch import nn, optim

import numpy as np

% matplotlib inline
import matplotlib.pyplot as plt

from fashionmnist_cnn import *
from utils import *

In [2]:
DISABLE_CUDA = False

if not DISABLE_CUDA and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [5]:
dataset = FashionMNIST(
    os.path.join(*'data/fashionMNIST'.split('/')),
    batch_size=32, val_size=.2
)

In [6]:
def fashionmnist_model():
    feature_model = nn.Sequential( # 1, 28, 28
        OrderedDict([
            ('conv1', nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1, bias=False)), # 32, 28, 28
            ('conv1_bn', nn.BatchNorm2d(32)),
            ('conv1_relu', nn.ReLU()),
            ('conv2', nn.Conv2d(32, 64, kernel_size=3, stride=3, padding=1, bias=False)), # 64, 10, 10
            ('conv2_bn', nn.BatchNorm2d(64)),
            ('conv2_relu', nn.ReLU()),
            ('conv3', nn.Conv2d(64, 128, kernel_size=3, stride=1, bias=False)), # 128, 8, 8
            ('conv3_bn', nn.BatchNorm2d(128)),
            ('conv3_relu', nn.ReLU()),
            ('conv4', nn.Conv2d(128, 128, kernel_size=3, stride=1, bias=False)), # 128, 6, 6
            ('conv4_bn', nn.BatchNorm2d(128)),
            ('conv4_relu', nn.ReLU())
        ])
    )

    classifier_model = nn.Sequential(
        OrderedDict([
            ('dense1', nn.Linear(128 * 6 * 6, 128, bias=False)),
            ('dense1_bn', nn.BatchNorm1d(128)),
            ('dense1_relu', nn.ReLU()),
            ('dense1_dropout', nn.Dropout()),
            ('output', nn.Linear(128, 10)),
        ])
    )

    model = nn.Sequential(
        OrderedDict([
            ('features', feature_model),
            ('flatten', Flatten()),
            ('classifier', classifier_model)
        ])
    )
    
    return model

In [7]:
N_EPOCHS = 50

model = fashionmnist_model()
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(1, N_EPOCHS + 1):
    model.train()
    train_loss = 0
    train_accuracy = 0
    
    for X, y, label in dataset.train_loader:
        X = X.to(device); y = y.to(device)
        
        model.zero_grad()
        optimizer.zero_grad()
        
        pred = model(X)
        loss = criterion(pred, y)
        
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_accuracy += (torch.argmax(pred, 1) == y).sum()
        
    train_accuracy = train_accuracy.item() / dataset.train_size
        
        
    model.eval()
    val_loss = 0
    val_accuracy = 0
    
    with torch.no_grad():
        for X, y, label in dataset.val_loader:
            X = X.to(device); y = y.to(device)

            pred = model(X)
            loss = criterion(pred, y)

            val_loss += loss.item()
            val_accuracy += (torch.argmax(pred, 1) == y).sum()
        
    val_accuracy = val_accuracy.item() / dataset.val_size
    
    print('Epoch %.2d: train_loss = %.3f, train_accuracy = %.3f, val_loss = %.3f, val_accuracy = %.3f' % (
        epoch, train_loss, train_accuracy, val_loss, val_accuracy
    ))

Epoch 01: train_loss = 694.541, train_accuracy = 0.848, val_loss = 106.329, val_accuracy = 0.896
Epoch 02: train_loss = 438.701, train_accuracy = 0.896, val_loss = 103.186, val_accuracy = 0.894
Epoch 03: train_loss = 367.884, train_accuracy = 0.912, val_loss = 85.404, val_accuracy = 0.914
Epoch 04: train_loss = 316.274, train_accuracy = 0.925, val_loss = 82.363, val_accuracy = 0.918
Epoch 05: train_loss = 273.123, train_accuracy = 0.934, val_loss = 82.148, val_accuracy = 0.920
Epoch 06: train_loss = 240.746, train_accuracy = 0.942, val_loss = 80.536, val_accuracy = 0.924
Epoch 07: train_loss = 204.797, train_accuracy = 0.951, val_loss = 80.881, val_accuracy = 0.925
Epoch 08: train_loss = 177.646, train_accuracy = 0.957, val_loss = 90.932, val_accuracy = 0.919
Epoch 09: train_loss = 153.927, train_accuracy = 0.962, val_loss = 88.620, val_accuracy = 0.926
Epoch 10: train_loss = 129.687, train_accuracy = 0.968, val_loss = 95.456, val_accuracy = 0.925
Epoch 11: train_loss = 117.862, train_

In [10]:
model_filename = os.path.join('models', 'fashionmnist.pt')
torch.save(model.state_dict(), model_filename)

In [11]:
model.eval()
test_accuracy = 0

with torch.no_grad():
    for X, y, label in dataset.test_loader:
        X = X.to(device); y = y.to(device)

        pred = model(X)
        test_accuracy += (torch.argmax(pred, 1) == y).sum()

test_accuracy = test_accuracy.item() / dataset.test_size

print('Test accuracy: %.3f' % test_accuracy)
print('Test error rate: %.3f' % (1 - test_accuracy))

Test accuracy: 0.920
Test error rate: 0.080
