In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np

from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:06<00:00, 26723610.40it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.leaky_relu(self.conv1(x)))
        x = self.pool(F.leaky_relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.leaky_relu(self.fc1(x))
        x = F.leaky_relu(self.fc2(x))
        x = self.fc3(x)
        return x


model_leaky_relu = Net()

In [4]:
def train_model(optimizer, criterion, epochs, model):
  for epoch in range(epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        writer.add_scalar("Loss/train", loss, epoch)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

  print('Finished Training')
  writer.flush()

In [5]:
def predict_test(model):
  correct = 0
  total = 0
  # since we're not training, we don't need to calculate the gradients for our outputs
  with torch.no_grad():
      for data in testloader:
          images, labels = data
          # calculate outputs by running images through the network
          outputs = model(images)
          # the class with the highest energy is what we choose as prediction
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

### Use SGD as Optimizer



In [6]:
criterion = nn.CrossEntropyLoss()
optimizer_sgd = optim.SGD(model_leaky_relu.parameters(), lr=0.0001)
train_model(optimizer_sgd, criterion, 5, model_leaky_relu)
predict_test(model_leaky_relu)

[1,  2000] loss: 2.304
[1,  4000] loss: 2.303
[1,  6000] loss: 2.304
[1,  8000] loss: 2.304
[1, 10000] loss: 2.304
[1, 12000] loss: 2.303
[2,  2000] loss: 2.302
[2,  4000] loss: 2.303
[2,  6000] loss: 2.303
[2,  8000] loss: 2.303
[2, 10000] loss: 2.302
[2, 12000] loss: 2.301
[3,  2000] loss: 2.302
[3,  4000] loss: 2.301
[3,  6000] loss: 2.301
[3,  8000] loss: 2.301
[3, 10000] loss: 2.300
[3, 12000] loss: 2.300
[4,  2000] loss: 2.300
[4,  4000] loss: 2.299
[4,  6000] loss: 2.299
[4,  8000] loss: 2.299
[4, 10000] loss: 2.298
[4, 12000] loss: 2.298
[5,  2000] loss: 2.298
[5,  4000] loss: 2.296
[5,  6000] loss: 2.297
[5,  8000] loss: 2.296
[5, 10000] loss: 2.296
[5, 12000] loss: 2.295
Finished Training
Accuracy of the network on the 10000 test images: 12 %


### Use ADAM as Optimizer


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer_adam = optim.Adam(model_leaky_relu.parameters(), lr=0.0001)
train_model(optimizer_adam, criterion, 5, model_leaky_relu)
predict_test(model_leaky_relu)

[1,  2000] loss: 2.025
[1,  4000] loss: 1.818
[1,  6000] loss: 1.716
[1,  8000] loss: 1.670
[1, 10000] loss: 1.612
[1, 12000] loss: 1.566
[2,  2000] loss: 1.535
[2,  4000] loss: 1.532
[2,  6000] loss: 1.514
[2,  8000] loss: 1.479
[2, 10000] loss: 1.459
[2, 12000] loss: 1.447
[3,  2000] loss: 1.450
[3,  4000] loss: 1.415
[3,  6000] loss: 1.393
[3,  8000] loss: 1.394
[3, 10000] loss: 1.386
[3, 12000] loss: 1.365
[4,  2000] loss: 1.345
[4,  4000] loss: 1.338
[4,  6000] loss: 1.317
[4,  8000] loss: 1.312
[4, 10000] loss: 1.319
[4, 12000] loss: 1.314
[5,  2000] loss: 1.277
[5,  4000] loss: 1.281
[5,  6000] loss: 1.258
[5,  8000] loss: 1.266
[5, 10000] loss: 1.248
[5, 12000] loss: 1.253
Finished Training
Accuracy of the network on the 10000 test images: 54 %


## Use Tanh as Activation Function

In [9]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.tanh(self.conv1(x)))
        x = self.pool(torch.tanh(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x


model_tanh = Net()

### Use SGD as Optimizer


In [10]:
criterion = nn.CrossEntropyLoss()
optimizer_sgd = optim.SGD(model_tanh.parameters(), lr=0.0001)
train_model(optimizer_sgd, criterion, 5, model_tanh)
predict_test(model_tanh)

[1,  2000] loss: 2.304
[1,  4000] loss: 2.303
[1,  6000] loss: 2.303
[1,  8000] loss: 2.300
[1, 10000] loss: 2.299
[1, 12000] loss: 2.297
[2,  2000] loss: 2.294
[2,  4000] loss: 2.292
[2,  6000] loss: 2.288
[2,  8000] loss: 2.283
[2, 10000] loss: 2.279
[2, 12000] loss: 2.273
[3,  2000] loss: 2.263
[3,  4000] loss: 2.253
[3,  6000] loss: 2.242
[3,  8000] loss: 2.226
[3, 10000] loss: 2.211
[3, 12000] loss: 2.198
[4,  2000] loss: 2.175
[4,  4000] loss: 2.158
[4,  6000] loss: 2.142
[4,  8000] loss: 2.132
[4, 10000] loss: 2.109
[4, 12000] loss: 2.097
[5,  2000] loss: 2.080
[5,  4000] loss: 2.069
[5,  6000] loss: 2.058
[5,  8000] loss: 2.036
[5, 10000] loss: 2.030
[5, 12000] loss: 2.025
Finished Training
Accuracy of the network on the 10000 test images: 26 %


### Use ADAM as Optimizer


In [11]:
criterion = nn.CrossEntropyLoss()
optimizer_adam = optim.Adam(model_tanh.parameters(), lr=0.0001)
train_model(optimizer_adam, criterion, 5, model_tanh)
predict_test(model_tanh)

[1,  2000] loss: 1.945
[1,  4000] loss: 1.761
[1,  6000] loss: 1.661
[1,  8000] loss: 1.602
[1, 10000] loss: 1.560
[1, 12000] loss: 1.505
[2,  2000] loss: 1.452
[2,  4000] loss: 1.440
[2,  6000] loss: 1.425
[2,  8000] loss: 1.378
[2, 10000] loss: 1.373
[2, 12000] loss: 1.381
[3,  2000] loss: 1.336
[3,  4000] loss: 1.315
[3,  6000] loss: 1.332
[3,  8000] loss: 1.297
[3, 10000] loss: 1.285
[3, 12000] loss: 1.276
[4,  2000] loss: 1.253
[4,  4000] loss: 1.244
[4,  6000] loss: 1.228
[4,  8000] loss: 1.223
[4, 10000] loss: 1.240
[4, 12000] loss: 1.200
[5,  2000] loss: 1.196
[5,  4000] loss: 1.179
[5,  6000] loss: 1.161
[5,  8000] loss: 1.187
[5, 10000] loss: 1.158
[5, 12000] loss: 1.162
Finished Training
Accuracy of the network on the 10000 test images: 57 %


In [12]:
writer.close()