In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7f72f82f4cb0>

In [2]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

In [3]:
from torchvision import datasets, transforms
data_path = '../data-unversioned/p1ch6/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))
cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified
Files already downloaded and verified


In [4]:
label_map = {0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7,8:8,9:9}
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']
cifar10 = [(img, label_map[label])
          for img, label in cifar10 
          if label in [0,1,2,3,4,5,6,7,8,9]]
cifar10_val = [(img, label_map[label])
              for img, label in cifar10_val
              if label in [0,1,2,3,4,5,6,7,8,9]]

In [5]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8) # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [6]:
import datetime

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [7]:
import time
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f"Training on device {device}.")

train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)

model = Net().to(device=device)  # <1>
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

time1 = time.time()
training_loop(
    n_epochs = 200,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
time2 = time.time()
time3 = time2-time1
print(time3)

Training on device cuda.
2021-04-19 21:27:42.227406 Epoch 1, Training loss 2.028366512197363
2021-04-19 21:28:01.843502 Epoch 10, Training loss 1.154138752947683
2021-04-19 21:28:22.519436 Epoch 20, Training loss 0.9853798715049958
2021-04-19 21:28:43.255942 Epoch 30, Training loss 0.9043115894965199
2021-04-19 21:29:03.804381 Epoch 40, Training loss 0.850585126282309
2021-04-19 21:29:24.046540 Epoch 50, Training loss 0.8108680477685026
2021-04-19 21:29:44.406119 Epoch 60, Training loss 0.7796842048463919
2021-04-19 21:30:05.670228 Epoch 70, Training loss 0.7528639595069544
2021-04-19 21:30:29.450660 Epoch 80, Training loss 0.7283742182013934
2021-04-19 21:30:52.698926 Epoch 90, Training loss 0.7110933865732549
2021-04-19 21:31:15.200056 Epoch 100, Training loss 0.689933990159303
2021-04-19 21:31:37.887068 Epoch 110, Training loss 0.6752988641600475
2021-04-19 21:32:01.119611 Epoch 120, Training loss 0.6596245721859091
2021-04-19 21:32:22.227179 Epoch 130, Training loss 0.6461609196098

In [11]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                         shuffle=False)
all_acc_dict = collections.OrderedDict()

def validate(model, train_loader, val_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <1>
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.2f}".format(name , correct / total))
        accdict[name] = correct / total
    return accdict

all_acc_dict["baseline"] = validate(model, train_loader, val_loader)

Accuracy train: 0.75
Accuracy val: 0.62


In [17]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.conv3 = nn.Conv2d(8, 4, kernel_size=3, padding=1)
        self.act3 = nn.Tanh()
        self.pool3 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8) # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [18]:
import time
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f"Training on device {device}.")

train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)

model = Net().to(device=device)  # <1>
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

time1 = time.time()
training_loop(
    n_epochs = 200,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
time2 = time.time()
time3 = time2-time1
print(time3)

Training on device cuda.
2021-04-19 21:47:39.648283 Epoch 1, Training loss 2.0409646256805383
2021-04-19 21:47:54.789939 Epoch 10, Training loss 1.2012045902516835
2021-04-19 21:48:11.485192 Epoch 20, Training loss 1.0187745111830093
2021-04-19 21:48:28.284106 Epoch 30, Training loss 0.9258129337559575
2021-04-19 21:48:45.047059 Epoch 40, Training loss 0.8698664039678281
2021-04-19 21:49:01.861198 Epoch 50, Training loss 0.824110525724528
2021-04-19 21:49:18.620275 Epoch 60, Training loss 0.7890334278345108
2021-04-19 21:49:35.446955 Epoch 70, Training loss 0.7610324769449965
2021-04-19 21:49:52.305443 Epoch 80, Training loss 0.7380371911598899
2021-04-19 21:50:09.107795 Epoch 90, Training loss 0.7156534763934362
2021-04-19 21:50:25.861680 Epoch 100, Training loss 0.6969152129519626
2021-04-19 21:50:42.539987 Epoch 110, Training loss 0.6814602776180447
2021-04-19 21:50:59.327060 Epoch 120, Training loss 0.6656537890967811
2021-04-19 21:51:16.088758 Epoch 130, Training loss 0.6503787103

In [16]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                         shuffle=False)
all_acc_dict = collections.OrderedDict()

def validate(model, train_loader, val_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <1>
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.2f}".format(name , correct / total))
        accdict[name] = correct / total
    return accdict

all_acc_dict["baseline"] = validate(model, train_loader, val_loader)

Accuracy train: 0.79
Accuracy val: 0.64
