In [1]:
import datetime
import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import datasets, transforms, models

In [2]:
DATA_PATH = '../../datasets/'
BATCH_SIZE = 100
MOMENTUM = 0.9
EPOCHS = 20

In [3]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        print('{} Epoch {}, Training loss {}'.format(
            datetime.datetime.now(), epoch,
            loss_train / len(train_loader)))


def calculate_accuracy(model, train_loader, test_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("test", test_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1)
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.3f}".format(name, correct / total))
        accdict[name] = correct / total
    return accdict

In [4]:
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f'Using {device}')
loss_fn = nn.CrossEntropyLoss()

Using cuda


In [5]:
mnist_train = datasets.MNIST(
    DATA_PATH, train=True, download=True, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.1307,), std=(0.3081,))]))
mnist_test = datasets.MNIST(
    DATA_PATH, train=False, download=True, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.1325,), std=(0.3105,))]))

In [6]:
cifar10_train = datasets.CIFAR10(
    DATA_PATH, train=True, download=True, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))
cifar10_test = datasets.CIFAR10(
    DATA_PATH, train=False, download=True, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified
Files already downloaded and verified


### LeNet, MNIST

In [7]:
class LeNet5(nn.Module):
    def __init__(self, num_classes):
        super(LeNet5, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Linear(400, 120)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(120, 84)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(84, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.relu(out)
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

In [8]:
model = LeNet5(num_classes=10).to(device=device)
train_loader = torch.utils.data.DataLoader(
    mnist_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    mnist_test, batch_size=BATCH_SIZE, shuffle=True)

**SGD**

In [None]:
optimizer = optim.SGD(model.parameters(), lr=1e-2)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

2024-12-27 03:43:18.701674 Epoch 1, Training loss 0.9385420573751132
2024-12-27 03:43:30.978202 Epoch 2, Training loss 0.17531632191812
2024-12-27 03:43:43.432280 Epoch 3, Training loss 0.11634739998107155
2024-12-27 03:43:54.694734 Epoch 4, Training loss 0.09242940478647749
2024-12-27 03:44:06.093473 Epoch 5, Training loss 0.07849007670767605
2024-12-27 03:44:17.048328 Epoch 6, Training loss 0.06920792249497026
2024-12-27 03:44:28.009229 Epoch 7, Training loss 0.06252937906887382
2024-12-27 03:44:39.060079 Epoch 8, Training loss 0.057398481499403714
2024-12-27 03:44:50.261755 Epoch 9, Training loss 0.052718457348334295
2024-12-27 03:45:01.717590 Epoch 10, Training loss 0.04955120619619265


**Adadelta**

In [None]:
optimizer = optim.Adadelta(model.parameters(), lr=1e-2)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

**NAG**

In [None]:
optimizer = optim.SGD(model.parameters(), lr=1e-2,
                      momentum=MOMENTUM, nesterov=True)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

**Adam**

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-2)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

### VGG16, CIFAR10

In [None]:
model = models.vgg16(num_classes=10, dropout=0.5).to(device=device)
train_loader = torch.utils.data.DataLoader(
    cifar10_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    cifar10_test, batch_size=BATCH_SIZE, shuffle=True)

**SGD**

In [None]:
optimizer = optim.SGD(model.parameters(), lr=1e-2)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

**Adadelta**

In [None]:
optimizer = optim.Adadelta(model.parameters(), lr=1e-2)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

**NAG**

In [None]:
optimizer = optim.SGD(model.parameters(), lr=1e-2,
                      momentum=MOMENTUM, nesterov=True)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

**Adam**

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-2)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

### ResNet34, CIFAR10

In [None]:
model = models.resnet34(num_classes=10).to(device)

**SGD**

In [None]:
optimizer = optim.SGD(model.parameters(), lr=1e-2)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

**Adadelta**

In [None]:
optimizer = optim.Adadelta(model.parameters(), lr=1e-2)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

**NAG**

In [None]:
optimizer = optim.SGD(model.parameters(), lr=1e-2,
                      momentum=MOMENTUM, nesterov=True)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)

**Adam**

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-2)
train(n_epochs=EPOCHS, optimizer=optimizer, model=model,
      loss_fn=loss_fn, train_loader=train_loader)
calculate_accuracy(model, train_loader, test_loader)