In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

%matplotlib inline

In [2]:
def get_mean_and_std(dataset, num_channels):
    """
    Calculate the mean and std of a dataset
    """
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
    mean = torch.zeros(3)
    std = torch.zeros(3)
    for inputs, labels in dataloader:
      for channel in range(num_channels):
        mean[channel] += inputs[:,channel,:,:].mean()
        std[channel] += inputs[:,channel,:,:].std()
    mean.div_(len(dataset))
    std.div_(len(dataset))
    return mean, std

#Dummy dataset to get mean and std
temp_dataset = torchvision.datasets.CIFAR10(root="/content/data", train=True, download=True, transform=transforms.ToTensor())
mean, std = get_mean_and_std(temp_dataset, 3)
mean = tuple(mean.numpy())
std = tuple(std.numpy())


transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
train_dataset = torchvision.datasets.CIFAR10(root="/content/data/train", train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root="/content/data/test", train=False, download=True, transform=transform_test)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/data/train/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43157810.95it/s]


Extracting /content/data/train/cifar-10-python.tar.gz to /content/data/train
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/data/test/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 42090831.34it/s]


Extracting /content/data/test/cifar-10-python.tar.gz to /content/data/test


In [3]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
batch_size = 64
learning_rates = [0.05, 0.01, 0.005, 0.001]
activations = ["relu", "tanh"]
pools = ["max", "average"]
optimizers = ["Adam", "SGD"]
epochs = 10


trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer','dog', 'frog', 'horse', 'ship', 'truck')

In [4]:
class LeNet(nn.Module):
  def __init__(self, act_func="relu", pool_type="max"):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 6*3, 5)
    self.conv2 = nn.Conv2d(6*3, 16*3, 5)
    self.pool1 = self.pool(pool_type)
    self.pool2 = self.pool(pool_type)
    self.fc1   = nn.Linear(1200, 120)
    self.fc2   = nn.Linear(120, 84)
    self.fc3   = nn.Linear(84, 10)
    self.act_func = act_func
    self.pool_type = pool_type

  def pool(self, pool_type="max"):
    if pool_type == "average":
      return nn.AvgPool2d(2)
    else:
      return nn.MaxPool2d(2)

  def act(self, x, act_func="relu"):
    if act_func == "tanh":
      return F.tanh(x)
    else:
      return F.relu(x)

  def forward(self, x):
    x = self.act(self.conv1(x), self.act_func)
    x = self.pool1(x)
    x = self.act(self.conv2(x), self.act_func)
    x = self.pool2(x)
    x = x.view(x.size(0), -1)
    x = self.act(self.fc1(x), self.act_func)
    x = self.act(self.fc2(x), self.act_func)
    x = self.fc3(x)
    return x

In [5]:
def compute_accuracy_test(model, dataloader, device):
    """
    Compute accuracy on test set
    """
    correct, total = 0, 0
    for inputs, targets in dataloader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)
        _, predict = outputs.max(1)

        total += targets.size(0)
        correct += predict.eq(targets).sum().item()

    return correct/total * 100

In [6]:
def train(model, trainloader, testloader, criterion, optimizer, epochs, device, compute_accuracy_test):
  loss_history, acc_history = [], []
  for epoch in range(epochs):
    print(f'\nEpoch {epoch+1}:')
    model.train()
    train_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predict = outputs.max(1)
        total += targets.size(0)
        correct += predict.eq(targets).sum().item()
        if (not batch_idx % 150) and batch_idx != 0:
              print ('Batch %03d | Cost: %.6f | Accuracy: %.4f'
                    %(batch_idx, train_loss/(batch_idx+1), 100*correct/total))

    loss_history.append(train_loss/(batch_idx+1))
    acc_history.append(100*correct/total)

    model.eval()
    with torch.set_grad_enabled(False):
        test_acc = compute_accuracy_test(net, testloader, device)

  return loss_history, acc_history, test_acc

In [None]:
def subplot_act_pool(axs, idx):
  for key, value in history.items():
    xs = [x+1 for x in range(len(value[idx]))]
    axs.plot(xs, value[idx], label=key+" pool")
  axs.legend()
  axs.set_xlabel("Epoch")
  if idx == 0:
    axs.set_ylabel("Training loss")
  if idx == 1:
    axs.set_ylabel("Accuracy")

def plot_act_pool(history):
  """
  Plot combinations of activation functions and pooling methods
  """
  fig, axs = plt.subplots(1,2, figsize=(12, 5))
  for i in range(2):
    subplot_act_pool(axs[i], i)
  plt.annotate('Train with Adam optimizer using constant learning rate=0.001 and weight decay=5e-4',
              xy = (1.0, -0.2),
              xycoords='axes fraction',
              ha='right',
              va="center",
              fontsize=10)
  fig.show()

def subplot_opt_lr(axs, key, value):
  xs = [x+1 for x in range(10)]
  axs.plot(xs, value["Adam"][0], label="Adam")
  axs.plot(xs, value["SGD"][0], label="SGD with momentum")
  axs.text(xs[-1], value["Adam"][0][-1], '{:.3f}'.format(value["Adam"][0][-1]))
  axs.text(xs[-1], value["SGD"][0][-1], '{:.3f}'.format(value["SGD"][0][-1]))
  axs.legend()
  axs.set_xlabel("Epoch")
  axs.set_ylabel("Training loss")
  axs.set_title(f"Learning rate {key}")

def plot_opt_lr(history):
  """
  Plot combinations of different optimizers and learning rates
  """
  fig, axs = plt.subplots(2,2, figsize=(12, 10))
  for i, (key, value) in enumerate(history.items()):
    subplot_opt_lr(axs[i//2, i%2], key, value)
  fig.show()



In [None]:
#Compare between different learning rates and optimizers
history = {}
for lr in learning_rates:
  opt_dict = {}
  for opt in optimizers:

    net = LeNet()
    net.to(device)
    if device == "cuda:0":
      net = nn.DataParallel(net)

    if opt == "Adam":
      optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=5e-4)
    if opt == "SGD":
      optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

    criterion = nn.CrossEntropyLoss()
    loss_history, acc_history, test_acc = train(net, trainloader, testloader, criterion, optimizer, epochs, device, compute_accuracy_test)

    opt_dict[opt] = (loss_history, acc_history, test_acc)
  history[str(lr)] = opt_dict

plot_opt_lr(history)

In [None]:
#Compare between different pooling methods and activation functions
history = {}
for activation in activations:
  for pool in pools:
    net = LeNet(act_func=activation, pool_type=pool)
    net.to(device)
    if device == "cuda:0":
      net = nn.DataParallel(net)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rates[2], weight_decay=5e-4)

    loss_history, acc_history, test_acc = train(net, trainloader, testloader, criterion, optimizer, epochs, device, compute_accuracy_test)

    history[f"{activation} + {pool}"] = (loss_history, acc_history, test_acc)

plot_act_pool(history)