In [1]:
import torch
from torch.autograd import Variable
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

train_input, train_target, test_input, test_target = \
    prologue.load_data(one_hot_labels = True, normalize = True, flatten = False)

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


Input: N x 1 x 28 x 28   
Conv1: N x 32 x 24 x 24   
pool1: N x 32 x 8 x 8   
Conv2: N x 64 x 4 x 4   
pool2: N x 64 x 2 x 2   
view: N x 1 x 256   
fc1: N x 1 x 200   
fc2: N x 1 x 10   

In [14]:
class Net(nn.Module):
    def __init__(self, nb_hiddens):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, nb_hiddens)
        self.fc2 = nn.Linear(nb_hiddens, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x
    
class Net2(nn.Module):
    def __init__(self, nb_hiddens):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(1, 24, kernel_size=5)
        self.conv2 = nn.Conv2d(24, 48, kernel_size=4)
        self.conv3 = nn.Conv2d(48, 96, kernel_size=2)
        self.fc1 = nn.Linear(96*2*2, nb_hiddens)
        self.fc2 = nn.Linear(nb_hiddens, 10)
        
    def forward(self, x):
        # 1*28*28
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        # 24*24*24 -> 24*12*12
        
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=3, stride=3))
        # 48*9*9 -> 48*3*3
        
        x = F.relu(self.conv3(x))
        # 96*2*2
        
        x = F.relu(self.fc1(x.view(-1, 96*2*2)))
        x = self.fc2(x)
        return x

In [3]:
def train_model(model, train_input, train_target, mini_batch_size):
    criterion = nn.MSELoss()
    eta = 1e-1

    for e in range(0, 25):
        sum_loss = 0
        # We do this with mini-batches
        for b in range(0, train_input.size(0), mini_batch_size):
            # forward pass
            output = model(train_input.narrow(0, b, mini_batch_size))
            # compute loss
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            sum_loss = sum_loss + loss.data
            
            # reset gradient variables
            model.zero_grad()
            # backward pass
            loss.backward()
            for p in model.parameters():
                p.data.sub_(eta * p.grad.data)
        # print(e, sum_loss)
    print('Final sum_loss: ', sum_loss)

*compute_nb_errors*
* To compute the number of prediction mistakes using a “winner-take-all” rule, that is the class with
the largest output is the predicted one.

In [8]:
def compute_nb_errors(model, input, target, mini_batch_size):
    nb_errors = 0
    
    for i in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, i, mini_batch_size))
        _, predicted_index = output.data.max(1)
        for j in range(0, mini_batch_size):
            if target.data[i+j, predicted_index[j]] < 0:
                nb_errors += 1
                
    return nb_errors

In [9]:
# Driver
train_input, train_target = Variable(train_input), Variable(train_target)
test_input, test_target = Variable(test_input), Variable(test_target)
mini_batch_size = 100

#train_model(model, train_input, train_target, mini_batch_size)
## ex2 ##
for i in range(10):
    # default nn with 200 hidden units
    model = Net(200)
    
    train_model(model, train_input, train_target, mini_batch_size)
    nb_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print ('error: {:.02f}% {:d} / {:d}'.format(100*nb_errors / test_input.size(0), nb_errors, test_input.size(0)))

Final sum_loss:  tensor(0.7994)
error: 9.40% 94 / 1000
Final sum_loss:  tensor(0.7642)
error: 8.80% 88 / 1000
Final sum_loss:  tensor(0.7445)
error: 11.20% 112 / 1000
Final sum_loss:  tensor(0.8316)
error: 11.10% 111 / 1000
Final sum_loss:  tensor(0.7700)
error: 10.10% 101 / 1000
Final sum_loss:  tensor(0.7821)
error: 11.60% 116 / 1000
Final sum_loss:  tensor(0.7222)
error: 10.00% 100 / 1000
Final sum_loss:  tensor(0.7828)
error: 10.60% 106 / 1000
Final sum_loss:  tensor(0.7677)
error: 9.70% 97 / 1000
Final sum_loss:  tensor(0.6800)
error: 8.60% 86 / 1000


In [10]:
## ex3 ##
for k in [10, 50, 200, 500, 1000]:
    # model with k hidden units
    model =  Net(k)
    
    train_model(model, train_input, train_target, mini_batch_size)
    nb_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print ('{:d} nb_hiddens -> error: {:.02f}% {:d} / {:d}'.format(k, 100*nb_errors / test_input.size(0), nb_errors, test_input.size(0)))

Final sum_loss:  tensor(3.5999)
10 nb_hiddens -> error: 84.40% 844 / 1000
Final sum_loss:  tensor(0.8077)
50 nb_hiddens -> error: 10.40% 104 / 1000
Final sum_loss:  tensor(0.7144)
200 nb_hiddens -> error: 9.30% 93 / 1000
Final sum_loss:  tensor(0.7058)
500 nb_hiddens -> error: 11.10% 111 / 1000
Final sum_loss:  tensor(0.6850)
1000 nb_hiddens -> error: 8.20% 82 / 1000


In [17]:
## ex4 ##
model = Net2(200)
train_model(model, train_input, train_target, mini_batch_size)
nb_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
print ('error: {:.02f}% {:d} / {:d}'.format(100*nb_errors / test_input.size(0), nb_errors, test_input.size(0)))


Final sum_loss:  tensor(0.9636)
error: 9.30% 93 / 1000
