In [15]:
import torch
import torchvision.datasets as datasets
from torchvision.transforms import v2 as transforms

# Define transformations
transform = transforms.Compose([transforms.ToImage(),
                                transforms.ToDtype(torch.float32, scale=True),
                                transforms.Normalize((0.1307,), (0.3081,))])

# Load training data
trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

# Load test data
testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

In [16]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = x
        return output

net = Net()

In [17]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [18]:
from tqdm import tqdm

def train(model, train_loader, optimizer, criterion, device):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (data, target) in tqdm(enumerate(train_loader), total=len(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = output.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()

    return train_loss / len(train_loader.dataset), correct / total

device = 'cpu' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1' etc., or try 'mps' on macbooks
net.to(device)

epochs = 5
for epoch in range(epochs):
    train_loss, train_acc = train(net, trainloader, optimizer, criterion, device)
    print(f'Epoch {epoch+1}, Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}')

100%|██████████| 938/938 [00:47<00:00, 19.94it/s]

Epoch 1, Loss: 0.0032, Accuracy: 0.9386



100%|██████████| 938/938 [00:46<00:00, 20.17it/s]

Epoch 2, Loss: 0.0014, Accuracy: 0.9748



100%|██████████| 938/938 [00:43<00:00, 21.40it/s]

Epoch 3, Loss: 0.0010, Accuracy: 0.9798



100%|██████████| 938/938 [00:43<00:00, 21.56it/s]

Epoch 4, Loss: 0.0008, Accuracy: 0.9835



100%|██████████| 938/938 [00:43<00:00, 21.59it/s]

Epoch 5, Loss: 0.0007, Accuracy: 0.9850





In [7]:
# net = torch.load('model1.pkl', weights_only=False)
# net.eval()

In [None]:
def test(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            total += target.size(0)
    test_loss /= len(test_loader.dataset)
    accuracy = correct / total
    print(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {accuracy:.4f}')

test(net, testloader, criterion, device="cpu")

torch.Size([64, 1, 28, 28])


0

In [20]:
# torch.save(net, 'model1.pkl')

In [6]:
from torch.utils.tensorboard import SummaryWriter

# Create a SummaryWriter to log to TensorBoard
writer = SummaryWriter('runs/mnist_experiment_1')

for epoch in range(epochs):
    train_loss, train_acc = train(net, trainloader, optimizer, criterion, device)
    test(net, testloader, criterion, device)
    
    # Log to TensorBoard
    writer.add_scalar('Loss/train', train_loss, epoch)
    writer.add_scalar('Accuracy/train', train_acc, epoch)

writer.close()

100%|██████████| 938/938 [00:44<00:00, 20.91it/s]


Test set: Average loss: 0.0005, Accuracy: 0.9921


100%|██████████| 938/938 [00:43<00:00, 21.44it/s]


Test set: Average loss: 0.0005, Accuracy: 0.9918


100%|██████████| 938/938 [00:48<00:00, 19.22it/s]


Test set: Average loss: 0.0005, Accuracy: 0.9920


100%|██████████| 938/938 [00:46<00:00, 20.34it/s]


Test set: Average loss: 0.0006, Accuracy: 0.9917


100%|██████████| 938/938 [00:46<00:00, 20.30it/s]


Test set: Average loss: 0.0005, Accuracy: 0.9921


100%|██████████| 938/938 [00:45<00:00, 20.49it/s]


Test set: Average loss: 0.0005, Accuracy: 0.9923


100%|██████████| 938/938 [00:45<00:00, 20.43it/s]


Test set: Average loss: 0.0005, Accuracy: 0.9911


100%|██████████| 938/938 [00:46<00:00, 20.29it/s]


Test set: Average loss: 0.0006, Accuracy: 0.9924


100%|██████████| 938/938 [00:45<00:00, 20.44it/s]


Test set: Average loss: 0.0005, Accuracy: 0.9921


100%|██████████| 938/938 [00:45<00:00, 20.49it/s]


Test set: Average loss: 0.0006, Accuracy: 0.9922


In [8]:
import os

best_accuracy = 0.0
checkpoint_dir = './checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

for epoch in range(epochs):
    train_loss, train_acc = train(net, trainloader, optimizer, criterion, device)
    test_accuracy = test(net, testloader, criterion, device)
    
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        torch.save({
            'epoch': epoch,
            'model_state_dict': net.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss,
            'accuracy': test_accuracy
        }, os.path.join(checkpoint_dir, 'best_model.pth'))

100%|██████████| 938/938 [00:43<00:00, 21.47it/s]


Test set: Average loss: 0.0005, Accuracy: 0.9928


TypeError: '>' not supported between instances of 'NoneType' and 'float'