## Section I. Preparation

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

In [2]:
NUM_TRAIN = 49000

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to
# preprocess the data by subtracting the mean RGB value and dividing by the
# standard deviation of each RGB value; we've hardcoded the mean and std.
transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-10
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.
cifar10_train = dset.CIFAR10('./datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64,
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64,
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./datasets', train=False, download=True,
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [3]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cuda


In [4]:
def train_part34(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.

    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for

    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss_function = nn.CrossEntropyLoss()
            loss = loss_function(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [5]:
def check_accuracy_part34(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

## PyTorch Sequential API
Sequential API: Two-Layer Network

In [6]:
# We need to wrap `flatten` function in a module in order to stack it
# in nn.Sequential

hidden_layer_size = 4000
learning_rate = 1e-2

model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(3 * 32 * 32, hidden_layer_size),
    nn.ReLU(),
    nn.Linear(hidden_layer_size, 10),
)

# you can use Nesterov momentum in optim.SGD
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                     momentum=0.9, nesterov=True)

train_part34(model, optimizer, epochs=3)

Iteration 0, loss = 2.3080
Checking accuracy on validation set
Got 150 / 1000 correct (15.00)

Iteration 100, loss = 1.9033
Checking accuracy on validation set
Got 384 / 1000 correct (38.40)

Iteration 200, loss = 1.5011
Checking accuracy on validation set
Got 397 / 1000 correct (39.70)

Iteration 300, loss = 1.5719
Checking accuracy on validation set
Got 383 / 1000 correct (38.30)

Iteration 400, loss = 1.7038
Checking accuracy on validation set
Got 443 / 1000 correct (44.30)

Iteration 500, loss = 1.7793
Checking accuracy on validation set
Got 433 / 1000 correct (43.30)

Iteration 600, loss = 1.6491
Checking accuracy on validation set
Got 419 / 1000 correct (41.90)

Iteration 700, loss = 1.9536
Checking accuracy on validation set
Got 434 / 1000 correct (43.40)

Iteration 0, loss = 1.7938
Checking accuracy on validation set
Got 420 / 1000 correct (42.00)

Iteration 100, loss = 1.8429
Checking accuracy on validation set
Got 438 / 1000 correct (43.80)

Iteration 200, loss = 1.5600
Check

## Sequential API: Three-Layer ConvNet

In [7]:
model = None
optimizer = None

model  = nn.Sequential(
    # N x 3 x 32 x 32
    nn.Conv2d(3, 32, 3, 1, 1),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    # N x 32 x 16 x 16
    nn.Conv2d(32, 16, 3, 1, 1),
    nn.BatchNorm2d(16),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    # N x 16 x 8 x 8
    nn.Flatten(),
    nn.Linear(16*8*8, 10)
)
optimizer = optim.Adam(model.parameters())


train_part34(model, optimizer, epochs=3)

Iteration 0, loss = 2.3929
Checking accuracy on validation set
Got 131 / 1000 correct (13.10)

Iteration 100, loss = 1.4644
Checking accuracy on validation set
Got 476 / 1000 correct (47.60)

Iteration 200, loss = 1.7257
Checking accuracy on validation set
Got 514 / 1000 correct (51.40)

Iteration 300, loss = 1.1933
Checking accuracy on validation set
Got 530 / 1000 correct (53.00)

Iteration 400, loss = 1.1519
Checking accuracy on validation set
Got 552 / 1000 correct (55.20)

Iteration 500, loss = 1.2115
Checking accuracy on validation set
Got 588 / 1000 correct (58.80)

Iteration 600, loss = 1.0462
Checking accuracy on validation set
Got 612 / 1000 correct (61.20)

Iteration 700, loss = 1.2258
Checking accuracy on validation set
Got 611 / 1000 correct (61.10)

Iteration 0, loss = 1.0449
Checking accuracy on validation set
Got 624 / 1000 correct (62.40)

Iteration 100, loss = 1.1443
Checking accuracy on validation set
Got 631 / 1000 correct (63.10)

Iteration 200, loss = 0.9295
Check

In [8]:
from torchsummary import summary
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
       BatchNorm2d-2           [-1, 32, 32, 32]              64
              ReLU-3           [-1, 32, 32, 32]               0
         MaxPool2d-4           [-1, 32, 16, 16]               0
            Conv2d-5           [-1, 16, 16, 16]           4,624
       BatchNorm2d-6           [-1, 16, 16, 16]              32
              ReLU-7           [-1, 16, 16, 16]               0
         MaxPool2d-8             [-1, 16, 8, 8]               0
           Flatten-9                 [-1, 1024]               0
           Linear-10                   [-1, 10]          10,250
Total params: 15,866
Trainable params: 15,866
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.92
Params size (MB): 0.06
Estimated Tot