# EE-559: Practical Session 4

## Introduction

The objective of this session is to implement a convolutional network and test the influence of the architecture on the performance.

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

## Training function

Re-organize the code to dene and use a function

train model(model, train input, train target, mini batch size)

**Hint**: My version is 605 characters long

In [2]:
train_input, train_target, test_input, test_target = \
    prologue.load_data(one_hot_labels = True, normalize = True, flatten = False)

* Using MNIST
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [3]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        nb_hidden = 200
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(9 * 64, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.conv3(x))
        x = F.relu(self.fc1(x.view(-1, 9 * 64)))
        x = self.fc2(x)
        return x

In [4]:
def train_model(model, train_input, train_target, mini_batch_size):
    criterion = nn.MSELoss()
    eta = 1e-1

    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            sum_loss = sum_loss + loss.item()
            with torch.no_grad():
                for p in model.parameters():
                    p -= eta * p.grad
        print(e, sum_loss)

def compute_nb_errors(model, input, target, mini_batch_size):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)
        for k in range(mini_batch_size):
            if target[b + k, predicted_classes[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

In [9]:
mini_batch_size = 100

## Test error

Write and test a function compute nb errors(model, input, target, mini batch size) To compute the number of prediction mistakes using a \winner-take-all" rule, that is the class with the largest output is the predicted one.

Run the training and test ten times, record the test error rates.

With 25 epochs for training, the test error should be around 10% with the small sets, and around 0:7% with the full ones.

**Hint**: My version is 424 characters long

In [6]:
for k in range(10):
    model = Net(200)
    train_model(model, train_input, train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))

0 5.112744241952896
1 3.7884375154972076
2 3.293623298406601
3 2.9757575690746307
4 2.6923083811998367
5 2.554612949490547
6 2.191692367196083
7 2.039821520447731
8 1.8264740705490112
9 1.7693022191524506
10 1.7337832748889923
11 1.524839848279953
12 1.41568061709404
13 1.3072502315044403
14 1.2673509791493416
15 1.180831916630268
16 1.0918994843959808
17 1.0624010860919952
18 0.985806442797184
19 0.9667170196771622
20 0.9843000769615173
21 0.8600574359297752
22 0.8859782591462135
23 0.8162537552416325
24 0.7888985760509968
test error Net 10.30% 103/1000
0 5.15368264913559
1 3.899659186601639
2 3.484398901462555
3 3.080592781305313
4 2.7276910692453384
5 2.556036412715912
6 2.260008215904236
7 2.1066131740808487
8 1.9061606377363205
9 1.5893488973379135
10 1.7481589615345001
11 1.566649541258812
12 1.3454816117882729
13 1.2616227343678474
14 1.2872117012739182
15 1.2269280031323433
16 1.1856093853712082
17 1.0022987723350525
18 1.0402314513921738
19 0.9310659766197205
20 0.952903836965

## Influence of the number of hidden units

In the default network, the number of hidden units is 200.

Modify the class constructor to take a parameter for that value, and run the training and compute the test error for 10, 50, 200, 500, and 1; 000 hidden units.

In [7]:
for nh in [ 10, 50, 200, 500, 2500 ]:
    model = Net(nh)
    train_model(model, train_input, train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print('test error Net nh={:d} {:0.2f}%% {:d}/{:d}'.format(nh,
                                                              (100 * nb_test_errors) / test_input.size(0),
                                                              nb_test_errors, test_input.size(0)))

0 5.4363861083984375
1 6.1824826300144196
2 3.9167848229408264
3 3.5962457060813904
4 3.4197466373443604
5 3.2805514335632324
6 3.284749537706375
7 3.1509396731853485
8 3.029834121465683
9 2.974440425634384
10 2.9795414209365845
11 2.8329491019248962
12 2.7390788793563843
13 2.660202071070671
14 2.6478630900382996
15 2.5123423784971237
16 2.4170942455530167
17 2.503698319196701
18 2.433146670460701
19 2.2901958376169205
20 2.2602210640907288
21 2.1708112955093384
22 2.109923794865608
23 2.2300566732883453
24 2.0837640166282654
test error Net nh=10 32.10%% 321/1000
0 5.245264142751694
1 3.884039491415024
2 3.4805814623832703
3 3.162073403596878
4 2.8438534438610077
5 2.673436015844345
6 2.5241805762052536
7 2.350790098309517
8 2.1533184200525284
9 1.8464730978012085
10 1.8945660591125488
11 1.6536803841590881
12 1.6085084676742554
13 1.5062538385391235
14 1.4224064126610756
15 1.5174818336963654
16 1.3327990174293518
17 1.2370204851031303
18 1.2130172923207283
19 1.1245168149471283
20 1

## Three convolutional layers

Write a new class Net2 with three convolutional layers. Pick the structure you want.

In [8]:
model = Net2()
train_model(model, train_input, train_target, mini_batch_size)
nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
print('test error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                   nb_test_errors, test_input.size(0)))

0 6.395033687353134
1 4.229063153266907
2 3.7480200231075287
3 3.6354694962501526
4 3.4385128915309906
5 3.149120569229126
6 2.8957360237836838
7 2.6231420636177063
8 2.449516549706459
9 2.224852740764618
10 2.016029581427574
11 2.0044833421707153
12 1.8064875900745392
13 1.6726615279912949
14 1.6341219395399094
15 1.5118607133626938
16 1.4863622039556503
17 1.3760399147868156
18 1.2655799984931946
19 1.1814519092440605
20 1.1100690364837646
21 1.1050541400909424
22 1.0946116000413895
23 1.0108418762683868
24 0.9369009137153625
test error Net2 11.60%% 116/1000
