In [40]:
from utils.torch_util import get_device
import torch
from torch import nn

model = nn.Sequential(
    # 1@28*28 -> 6@28*28
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(),
    # 6@28*28 -> 6@14*14
    nn.MaxPool2d(kernel_size=2, stride=2),
    # 6@14*14 -> 16@10*10
    nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(),
    # 16@10*10 -> 16@5*5
    nn.MaxPool2d(kernel_size=2, stride=2),
    # 16@5*5 -> 400
    nn.Flatten(),
    # 400 -> 120
    nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
    nn.Dropout(),
    # 120 -> 84
    nn.Linear(120, 84), nn.ReLU(),
    # 84 -> 10
    nn.Linear(84, 10),
    # nn.Softmax(dim=1)
)

train_device = get_device()

model = model.to(device=train_device)

In [41]:
from torchvision.transforms.v2 import ToImage, Compose, Resize, Normalize, ToDtype
from torchvision.datasets import MNIST
from datasets import root_path
from torch.utils.data import DataLoader

batch_size = 256
epoch = 10
lr = 0.003

transform = Compose([
    ToImage(),
    ToDtype(torch.float32),
    Normalize([0], [0.5])
])

train_data_loader = DataLoader(MNIST(root_path + "/torchvision/cache", train=True, download=True, transform=transform),
                               batch_size=batch_size, shuffle=True)
test_data_loader = DataLoader(MNIST(root_path + "/torchvision/cache", train=False, download=True, transform=transform),
                              batch_size=batch_size, shuffle=True)


def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        nn.init.xavier_uniform_(m.weight)


model.apply(init_weights)
print(f'training on {train_device}')


training on mps


In [42]:

import torch
from torchmetrics import Accuracy

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
loss = nn.CrossEntropyLoss()
accuracy = Accuracy(task="multiclass", num_classes=10)
accuracy = accuracy.to(train_device)

for i in range(epoch):
    model.train()
    for batch_idx, (X, y) in enumerate(train_data_loader):
        optimizer.zero_grad()
        X, y = X.to(train_device), y.to(train_device) # type: torch.Tensor, torch.Tensor
        y_pred = model(X)
        l = loss(y_pred, y)
        l.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        acc = accuracy(y_pred.argmax(dim=1), y)
        print(acc)


        pr = 0
        for test_X, test_y in test_data_loader:
            test_prec = model(test_X.to(train_device))
            pr += (test_prec.argmax(dim=1) == test_y.to(train_device)).sum().item()
        print(f'Test Precision: {pr / len(test_data_loader.dataset)}')



tensor(0.3021, device='mps:0')
Test Precision: 0.4602
tensor(0.3750, device='mps:0')
Test Precision: 0.5169
tensor(0.5000, device='mps:0')
Test Precision: 0.6214
tensor(0.5833, device='mps:0')
Test Precision: 0.6987
tensor(0.5833, device='mps:0')
Test Precision: 0.7624
tensor(0.6458, device='mps:0')
Test Precision: 0.8074
tensor(0.6875, device='mps:0')
Test Precision: 0.8499
tensor(0.7083, device='mps:0')
Test Precision: 0.8721
tensor(0.6458, device='mps:0')
Test Precision: 0.8865
tensor(0.7917, device='mps:0')
Test Precision: 0.9023
