In [1]:
import torch
from torch.autograd import Variable
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

#### The objective of this session is to implement a convolutional network and test the influence of the architecture on the performance

In [3]:
train_input, train_target, test_input, test_target = \
    prologue.load_data(one_hot_labels = True, normalize = True, flatten = False)


* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [4]:
train_target

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [1., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [6]:
train_input, train_target = Variable(train_input), Variable(train_target)

model, criterion = Net(), nn.MSELoss()
eta, mini_batch_size = 1e-1, 100

for e in range(0, 25):
    sum_loss = 0
    # We do this with mini-batches
    for b in range(0, train_input.size(0), mini_batch_size):
        output = model(train_input.narrow(0, b, mini_batch_size))
        loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
        sum_loss = sum_loss + loss.item()
        model.zero_grad()
        loss.backward()
        for p in model.parameters():
            p.data.sub_(eta * p.grad.data)
    print(e, sum_loss)


0 0.9033293426036835
1 0.7755392417311668
2 0.6989829018712044
3 0.6369436867535114
4 0.5857527442276478
5 0.5477792881429195
6 0.5352986119687557
7 0.4928424321115017
8 0.46781037747859955
9 0.45788421109318733
10 0.41965536773204803
11 0.4122872091829777
12 0.3947731666266918
13 0.3983162231743336
14 0.369354709982872
15 0.34924130886793137
16 0.3518020734190941
17 0.33309604972600937
18 0.3338467739522457
19 0.32091527432203293
20 0.3038053549826145
21 0.3046022206544876
22 0.284149082377553
23 0.2884716186672449
24 0.27921703085303307


## Trainning function

In [7]:
def train_model(model,train_input, train_target, mini_batch_size):
    
    train_input, train_target = Variable(train_input), Variable(train_target)
    criterion =nn.MSELoss()
    eta= 1e-1
    
    for epoch in range(0, 25):
        sum_loss = 0
        # We do this with mini-batches
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            sum_loss = sum_loss + loss.item()
            model.zero_grad()
            loss.backward()
            for p in model.parameters():
                p.data.sub_(eta * p.grad.data)
    #print(epoch, sum_loss)
    

In [8]:
model = Net()

In [9]:

mini_batch_size =100
train_model(model,train_input, train_target, mini_batch_size)

## Test error

In [10]:
def compute_nb_errors(model, input_,target,mini_batch_size):
    model.eval()
    output = model(input_)
    correct_pred = torch.sum(torch.argmax(output,dim=1)==torch.argmax(target,dim=1)).item()/target.size(0)
    return (1 - float(correct_pred) )*100
    

In [11]:
compute_nb_errors(model=model, 
                  input_=train_input,
                  target=train_target,
                  mini_batch_size=mini_batch_size)

6.799999999999995

In [12]:
model2 = Net()

In [13]:
train_error = []
test_error = []
for i in range(10):
    train_model(model2,train_input, train_target, mini_batch_size)
    loss_train = compute_nb_errors(model=model, 
                  input_=train_input,
                  target=train_target,
                  mini_batch_size=mini_batch_size)
    loss_test = compute_nb_errors(model=model, 
                  input_=test_input,
                  target=test_target,
                  mini_batch_size=mini_batch_size)
    
    train_error.append(round(loss_train,2))
    test_error.append(round(loss_test,2))
print('train error',train_error)
print('test error', test_error)


train error [6.8, 6.8, 6.8, 6.8, 6.8, 6.8, 6.8, 6.8, 6.8, 6.8]
test error [13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0]


## Influence of the number of hidden units

In [14]:
class Net2(nn.Module):
    def __init__(self,hidden_unit_size):
        
        super(Net2, self).__init__()
        self.hidden_unit_size=hidden_unit_size
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, self.hidden_unit_size)
        self.fc2 = nn.Linear(self.hidden_unit_size, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [15]:
hidden_unit_size =[10,50,200,500,1000]

for h in hidden_unit_size:
    
    model3 = Net2(h)
    
    train_model(model3,train_input, train_target, mini_batch_size)
    
    loss = compute_nb_errors(model=model3, 
                  input_=test_input,
                  target=test_target,
                  mini_batch_size=mini_batch_size)
    
    print('{} hidden unit test error {} '.format(h,loss))

10 hidden unit test error 43.99999999999999 
50 hidden unit test error 14.600000000000001 
200 hidden unit test error 14.500000000000002 
500 hidden unit test error 14.500000000000002 
1000 hidden unit test error 13.600000000000001 


## Three convolutional layers

In [46]:
class Net3(nn.Module):
    def __init__(self, hidden_unit):
        
        super(Net3, self).__init__()
        self.hidden_unit = hidden_unit
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3)
        self.fc1 = nn.Linear(3*3*64, self.hidden_unit)
        self.fc2 = nn.Linear(self.hidden_unit, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2,stride=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2,stride=2))
        x = F.relu(self.conv3(x))
        x = F.relu(self.fc1(x.view(-1, 3*3*64)))
        x = self.fc2(x)
        return x

In [47]:
def get_shape(h,p=0,k=5,s=1):
    return ((h+(2*p)-k)/s)+1

def maxp(w1,f,s):
    return (w1-f)/s +1

In [48]:
get_shape(5,k=3)

3.0

In [49]:
hidden_unit=200
model4= Net3(hidden_unit)

In [50]:
train_model(model4,train_input, train_target, mini_batch_size)
    
loss = compute_nb_errors(model=model4, 
                  input_=test_input,
                  target=test_target,
                  mini_batch_size=mini_batch_size)
print(loss)

23.9
