Refs:

https://github.com/deep-learning-with-pytorch/dlwpt-code

In [1]:
import numpy as np
import torch
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn

torch.set_printoptions(edgeitems=2)
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x7ffd381e13b0>

### Data

In [3]:
data_path = '../../data/'

cifar10 = datasets.CIFAR10(data_path, train=True, download=False,
        transform=transforms.Compose([transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))]))

In [4]:
cifar10_val = datasets.CIFAR10(data_path, train=False, download=False,
        transform=transforms.Compose([transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))]))

In [5]:
class_names = cifar10.classes
class_names

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [6]:
## create a new data containing only airplane and bird
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']
cifar2 = [(img, label_map[label]) for img, label in cifar10 if label in [0, 2]]
cifar2_val = [(img, label_map[label]) for img, label in cifar10_val if label in [0, 2]]

In [7]:
img, _ = cifar2[0]
img.unsqueeze(0).shape

torch.Size([1, 3, 32, 32])

### CNN

In [8]:
model = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),  ## 3 channels x 32x32 --> 16C x 32x32
            nn.Tanh(),                                   ## Tanh activation function
            nn.MaxPool2d(2),                             ## 16C x32x32 --> 16C x16x16
            nn.Conv2d(16, 8, kernel_size=3, padding=1),  ## 16C x16x16 --> 8C x16x16
            nn.Tanh(),
            nn.MaxPool2d(2),                             ## 8C x16x16 --> 8C x8x8
            # ... 
            nn.Linear(8*8*8, 32),                    ## 512--> 32
            nn.Tanh(),
            nn.Linear(32, 2))                        ## 32 --> 2

### CNN with nn.Module

The below `Net` class is equivalent to the `nn.Sequential` but by writing the `forward` function, we can manipulate the output of functions on it.

In [9]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8*8*8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        ## leave the batch dimension as -1 as we don't know how many samples will be in the batch
        out = out.view(-1, 8*8*8)  
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [10]:
model = Net()

In [11]:
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(18090, [432, 16, 1152, 8, 16384, 32, 64, 2])

### The functional API

Keep using `nn.modules` for `nn.Linear` and `nn.Conv2d` so that `Net` will be able to manage their `Parameters` during training. However, we can safely switch to the functional counterparts of pooling and activation, since they have no parameters.

In [12]:
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8*8*8, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8*8*8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [13]:
## test
model = Net()
model(img.unsqueeze(0))

tensor([[ 0.0402, -0.1418]], grad_fn=<AddmmBackward>)

### Training the model

In [14]:
import datetime

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    
    ## loop from 1 to n_epochs rather than starting at 0
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
    
        ## loop over dataset in the batches
        for imgs, labels in train_loader:
            
            ## feed a batch through our model
            outputs = model(imgs)
            
            ## compute the loss
            loss = loss_fn(outputs, labels) 

            optimizer.zero_grad()
            
            ## compute gradients
            loss.backward()
            
            ## update the model
            optimizer.step()

            loss_train += loss.item()

        if epoch == 1 or epoch % 5 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [15]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=True)

model = Net()
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 50,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2020-11-20 19:16:45.663559 Epoch 1, Training loss 0.5629173946228756
2020-11-20 19:16:53.996157 Epoch 5, Training loss 0.40446035782243034
2020-11-20 19:17:04.522009 Epoch 10, Training loss 0.32829160352421416
2020-11-20 19:17:15.088859 Epoch 15, Training loss 0.31165621804583604
2020-11-20 19:17:25.653615 Epoch 20, Training loss 0.297269969609133
2020-11-20 19:17:36.210832 Epoch 25, Training loss 0.28322002196767526
2020-11-20 19:17:46.789316 Epoch 30, Training loss 0.2685920842893564
2020-11-20 19:17:57.388806 Epoch 35, Training loss 0.2566953186111845
2020-11-20 19:18:07.998929 Epoch 40, Training loss 0.24859189076028812
2020-11-20 19:18:18.602011 Epoch 45, Training loss 0.23925565866528042
2020-11-20 19:18:29.202617 Epoch 50, Training loss 0.22952704383119657


In [16]:
### measure accuracy
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad(): ## not update the parameters
            for imgs, labels in loader:
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1)
                total += labels.shape[0]
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

Accuracy train: 0.90
Accuracy val: 0.89


### Saving the model

In [17]:
torch.save(model.state_dict(), data_path + 'birds_vs_airplanes.pt')

### Loading the model

In [18]:
## loading network weights
loaded_model = Net()
loaded_model.load_state_dict(torch.load(data_path + 'birds_vs_airplanes.pt'))

<All keys matched successfully>

## Training on the GPU

In [19]:
## check available device
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f"Training on: {device}.")

Training on: cpu.


In [20]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            ## move imgs and labels to the device:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if epoch == 1 or epoch % 5 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [21]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=True)

model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 50,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2020-11-20 19:18:32.757868 Epoch 1, Training loss 0.5597875814908629
2020-11-20 19:18:41.385512 Epoch 5, Training loss 0.4074325700094745
2020-11-20 19:18:52.066931 Epoch 10, Training loss 0.34432858390033627
2020-11-20 19:19:02.752808 Epoch 15, Training loss 0.31697659631063985
2020-11-20 19:19:13.478662 Epoch 20, Training loss 0.29688761929038227
2020-11-20 19:19:24.140288 Epoch 25, Training loss 0.2818479760057607
2020-11-20 19:19:34.775784 Epoch 30, Training loss 0.26767586916685104
2020-11-20 19:19:45.421237 Epoch 35, Training loss 0.25634436584581993
2020-11-20 19:19:56.061959 Epoch 40, Training loss 0.24430854613803754
2020-11-20 19:20:06.812985 Epoch 45, Training loss 0.23500440953073987
2020-11-20 19:20:17.678177 Epoch 50, Training loss 0.22465722737418617


In [22]:
## loading network weights
loaded_model = Net().to(device=device)
loaded_model.load_state_dict(torch.load(data_path
                                        + 'birds_vs_airplanes.pt',
                                        map_location=device))

<All keys matched successfully>