In [1]:
import torch
import torchvision


train_mnist = torchvision.datasets.MNIST(
    "./data",
    train=True,
    download=True,
    transform=torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.1307,), (0.3081,))
    ])
)

test_mnist = torchvision.datasets.MNIST(
    "./data",
    train=False,
    download=True,
    transform=torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.1307,), (0.3081,))
    ])
)


In [5]:
model = torch.nn.Sequential(
    torch.nn.Linear(28 * 28, 300),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(300, 300),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(300, 10),
    torch.nn.Softmax(dim=1)
)

model.to("cuda")

for digit, cls in train_mnist:
    digit = digit.to("cuda")
    digit = digit.view(digit.shape[0], 28 * 28)
    print(model(digit).shape)
    break

torch.Size([1, 10])


In [8]:
from tqdm import tqdm

dl = torch.utils.data.DataLoader(train_mnist, batch_size=32, shuffle=True)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(3):
    bar = tqdm(dl)
    for digit, cls in bar:
        digit = digit.to("cuda")
        cls = cls.to("cuda")

        digit = digit.view(digit.shape[0], 28 * 28)
        pred = model(digit)

        loss = loss_fn(pred, cls)
        accuracy = (pred.argmax(dim=1) == cls).float().mean()
        bar.set_description(f"Loss: {loss.item():.4f}, Accuracy: {accuracy.item():.4f}")

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Loss: 1.5271, Accuracy: 0.9375: 100%|██████████| 1875/1875 [00:08<00:00, 225.28it/s]
Loss: 1.4921, Accuracy: 0.9688: 100%|██████████| 1875/1875 [00:08<00:00, 230.53it/s]
Loss: 1.4612, Accuracy: 1.0000: 100%|██████████| 1875/1875 [00:08<00:00, 222.05it/s]
