In [1]:
import torch
import torchvision
from torchvision import datasets
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F

In [2]:
trans = torchvision.transforms.ToTensor()
train_data = datasets.FashionMNIST(root='../data', train=True, download=False, transform=trans)
val_data = datasets.FashionMNIST(root='../data', train=False, download=False, transform=trans)
print('The number of training data:', len(train_data))
print('The number of validation data:', len(val_data))

The number of training data: 60000
The number of validation data: 10000


In [3]:
batch_size = 64
train_dataloader = DataLoader(train_data, batch_size=batch_size)
val_dataloader = DataLoader(val_data, batch_size=batch_size)
for X, y in val_dataloader:
    print('Shape of X:', X.shape)
    print('Shape of y:', y.shape)
    break

Shape of X: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64])


In [4]:
class LeNet(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, X):
        X = self.pool1(F.relu(self.conv1(X)))
        X = self.pool2(F.relu(self.conv2(X)))
        X = self.flatten(X)
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.fc3(X)
        return X

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = LeNet().to(device)
loss = nn.CrossEntropyLoss()
optim = torch.optim.SGD(net.parameters(), lr=1e-2, momentum=0.9)

In [6]:
def train(dataloader, net, loss, optim):
    for batch, (X,y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        optim.zero_grad()
        y_hat = net(X)
        l = loss(y_hat, y)
        l.backward()
        optim.step()
        if batch%100 == 0:
            running_loss = l.item()
            current = batch * len(X)
            print('Train Loss: %.4f, [%d//%d]' % (running_loss, current, len(dataloader.dataset)))

In [7]:
def test(dataloader, net, loss):
    num_batches = len(dataloader)
    val_loss = 0.0
    acc = 0.0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            val_loss += loss(y_hat, y).item()
            acc += (y_hat.argmax(1)==y).type(torch.float32).sum().item()
    val_loss /= num_batches
    acc /= len(dataloader.dataset)
    print('Val Acc: %.4f, Val Avg Loss: %.4f' % (acc, val_loss))
    

In [8]:
EPOCHS = 5
print('training on', device)
for epoch in range(EPOCHS):
    print('epoch: %d' % (epoch+1))
    train(train_dataloader, net, loss, optim)
    test(val_dataloader, net, loss)
print('Done!')

training on cpu
epoch: 1
Train Loss: 2.3062, [0//60000]
Train Loss: 2.2977, [6400//60000]
Train Loss: 0.8749, [12800//60000]
Train Loss: 0.9043, [19200//60000]
Train Loss: 0.7966, [25600//60000]
Train Loss: 0.7048, [32000//60000]
Train Loss: 0.6319, [38400//60000]
Train Loss: 0.6037, [44800//60000]
Train Loss: 0.5233, [51200//60000]
Train Loss: 0.6208, [57600//60000]
Val Acc: 0.7861, Val Avg Loss: 0.5683
epoch: 2
Train Loss: 0.4795, [0//60000]
Train Loss: 0.4725, [6400//60000]
Train Loss: 0.3745, [12800//60000]
Train Loss: 0.5468, [19200//60000]
Train Loss: 0.5409, [25600//60000]
Train Loss: 0.4852, [32000//60000]
Train Loss: 0.4862, [38400//60000]
Train Loss: 0.5730, [44800//60000]
Train Loss: 0.4819, [51200//60000]
Train Loss: 0.5447, [57600//60000]
Val Acc: 0.8138, Val Avg Loss: 0.4837
epoch: 3
Train Loss: 0.4170, [0//60000]
Train Loss: 0.4129, [6400//60000]
Train Loss: 0.3218, [12800//60000]
Train Loss: 0.4893, [19200//60000]
Train Loss: 0.5016, [25600//60000]
Train Loss: 0.4403, [