In [21]:
import dlc_practical_prologue
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from tqdm import trange

# AUXILLARY

In [22]:
train_input, train_target, train_classes, test_input, test_target, test_classes = dlc_practical_prologue.generate_pair_sets(1000)

In [23]:
# Weight-sharing "Siamese" LeNet
class Siamese(nn.Module):

    def __init__(self):
        super(Siamese, self).__init__()
        
        self.LeNet1 = nn.Sequential(
            nn.Conv2d(1,16,5),  # 16x10x10 (input is 1x14x14)
            nn.MaxPool2d(2),    # 16x5x5
            nn.ReLU(),
            nn.Conv2d(16,32,2), # 32x4x4
            nn.MaxPool2d(2),    # 32x2x2 (-> 1x128 before LeNet2)
            nn.ReLU()
        )
        self.LeNet2 = nn.Sequential(
            nn.Linear(128,64),  # 1x64
            nn.ReLU(),
            nn.Linear(64,32),   # 1x32
            nn.ReLU()
        )
        self.LeNet3 = nn.Sequential(
            nn.Linear(32,16),   # 1x16
            nn.Sigmoid(),
            nn.Linear(16,2)     # 1x2
        )
        self.AuxLayer = nn.Sequential(
            nn.Linear(32,16),   # 1x16
            nn.Sigmoid(),
            nn.Linear(16,10)     # 1x10
        )
        
    def forward_bro(self, x):
        x = self.LeNet1(x)
        x = x.view(-1,1,128)
        x = self.LeNet2(x)
        return x
    
    def forward(self, x1, x2):
        x1 = self.forward_bro(x1)
        x2 = self.forward_bro(x2)
        x3 = x2 - x1
        x1 = self.AuxLayer(x1)
        x2 = self.AuxLayer(x2)
        x3 = self.LeNet3(x3)
        return x1,x2,x3

In [24]:
# control convolutions' dimensions
x1 = train_input.narrow(0,0,100)
x1 = x1[:,0].view(100,1,14,14)
weight1 = torch.empty(16,1,5,5).normal_()
bias1 = torch.empty(16).normal_()
x1 = F.conv2d(x1, weight1, bias1)

x1 = F.max_pool2d(x1, 2)
x1 = F.relu(x1)

weight2 = torch.empty(32,16,2,2).normal_()
bias2 = torch.empty(32).normal_()
x1 = F.conv2d(x1, weight2, bias2)

x1 = F.max_pool2d(x1, 2)
x1 = F.relu(x1)

x1 = x1.view(-1,1,128)
x1.shape

torch.Size([100, 1, 128])

In [25]:
train_classes[0,0]

tensor(1)

In [26]:
train_classes.narrow(0, 0, 5)[:,0]

tensor([1, 9, 3, 3, 0])

In [27]:
train_classes

tensor([[1, 8],
        [9, 2],
        [3, 9],
        ...,
        [9, 9],
        [4, 0],
        [3, 0]])

In [28]:
def train_model(model, train_input, train_target,train_classes, batch_size, nb_epochs):
    criterion = nn.CrossEntropyLoss()
    aux_criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr = 1e-1)

    for e in range(nb_epochs):
        acc_loss = 0
        for b in range(0, train_input.size(0), batch_size):
            imgs = train_input.narrow(0, b, batch_size)
            imgs1 = imgs[:,0].view(batch_size, 1, 14, 14)
            imgs2 = imgs[:,1].view(batch_size, 1, 14, 14)
            x1_pred, x2_pred , output = model(imgs1, imgs2)
            x1_pred = x1_pred.view(batch_size, -1)
            x2_pred = x2_pred.view(batch_size, -1)
            output = output.view(batch_size, -1)
            loss = criterion(output, train_target.narrow(0, b, batch_size))
            loss_aux1 = aux_criterion(x1_pred,train_classes.narrow(0, b, batch_size)[:,0])
            loss_aux1 += aux_criterion(x2_pred,train_classes.narrow(0, b, batch_size)[:,1])
            loss = loss + loss_aux1
            acc_loss += loss.item()
            model.zero_grad()
            loss.backward()
            optimizer.step()
        print(e, acc_loss)

In [29]:
model = Siamese()
train_model(model, train_input, train_target,train_classes, 100, 25)

0 52.199321269989014
1 48.6403546333313
2 44.026705741882324
3 43.46076774597168
4 37.433269739151
5 33.358933210372925
6 30.44285559654236
7 28.416818380355835
8 28.387378215789795
9 23.477592706680298
10 20.939395904541016
11 20.029269456863403
12 17.550909638404846
13 19.082154989242554
14 16.621403217315674
15 13.245406746864319
16 11.870068311691284
17 22.739344000816345
18 15.087915420532227
19 10.917676329612732
20 9.340040862560272
21 8.107764482498169
22 8.0348020195961
23 6.467557907104492
24 5.976473927497864


In [30]:
def compute_nb_errors(model, input_data, target_data, batch_size):
    nb_errors = 0
    
    for b in range(0, input_data.size(0), batch_size):
            imgs = input_data.narrow(0, b, batch_size)
            target = target_data.narrow(0, b, batch_size)
            imgs1 = imgs[:,0].view(batch_size, 1, 14, 14)
            imgs2 = imgs[:,1].view(batch_size, 1, 14, 14)
            _,_, output = model(imgs1, imgs2)
            output = output.view(batch_size, -1)
            pred = output.max(1)[1]
            nb_errors += (pred-target).abs().sum().item()
    
    return nb_errors

In [31]:
train_errors = compute_nb_errors(model, train_input, train_target, 100)
test_errors = compute_nb_errors(model, test_input, test_target, 100)

In [32]:
print("train acc:" , (train_input.size(0) - train_errors)/train_input.size(0))
print("test acc:" , (test_input.size(0) - test_errors)/test_input.size(0))

train acc: 0.984
test acc: 0.893
