In [1]:
import dlc_practical_prologue
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from tqdm import trange

In [2]:
train_input, train_target, train_classes, test_input, test_target, test_classes = dlc_practical_prologue.generate_pair_sets(1000)

In [3]:
# Weight-sharing "Siamese" LeNet
class Siamese(nn.Module):

    def __init__(self):
        super(Siamese, self).__init__()
        
        self.LeNet1 = nn.Sequential(
            nn.Conv2d(1,16,5),  # 16x10x10 (input is 1x14x14)
            nn.MaxPool2d(2),    # 16x5x5
            nn.ReLU(),
            nn.Conv2d(16,32,2), # 32x4x4
            nn.MaxPool2d(2),    # 32x2x2 (-> 1x128 before LeNet2)
            nn.ReLU()
        )
        self.LeNet2 = nn.Sequential(
            nn.Linear(128,64),  # 1x64
            nn.ReLU(),
            nn.Linear(64,32),   # 1x32
            nn.ReLU()
        )
        self.LeNet3 = nn.Sequential(
            nn.Linear(32,16),   # 1x16
            nn.Sigmoid(),
            nn.Linear(16,2)     # 1x2
        )
        
    def forward_bro(self, x):
        x = self.LeNet1(x)
        x = x.view(-1,1,128)
        x = self.LeNet2(x)
        return x
    
    def forward(self, x1, x2):
        x1 = self.forward_bro(x1)
        x2 = self.forward_bro(x2)
        x3 = x1 + x2
        x3 = self.LeNet3(x3)
        return x3

In [4]:
# control convolutions' dimensions
x1 = train_input.narrow(0,0,100)
x1 = x1[:,0].view(100,1,14,14)
weight1 = torch.empty(16,1,5,5).normal_()
bias1 = torch.empty(16).normal_()
x1 = F.conv2d(x1, weight1, bias1)

x1 = F.max_pool2d(x1, 2)
x1 = F.relu(x1)

weight2 = torch.empty(32,16,2,2).normal_()
bias2 = torch.empty(32).normal_()
x1 = F.conv2d(x1, weight2, bias2)

x1 = F.max_pool2d(x1, 2)
x1 = F.relu(x1)

x1 = x1.view(-1,1,128)
x1.shape

torch.Size([100, 1, 128])

In [5]:
def train_model(model, train_input, train_target, batch_size, nb_epochs):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr = 1e-1)

    for e in range(nb_epochs):
        acc_loss = 0
        for b in range(0, train_input.size(0), batch_size):
            imgs = train_input.narrow(0, b, batch_size)
            imgs1 = imgs[:,0].view(batch_size, 1, 14, 14)
            imgs2 = imgs[:,1].view(batch_size, 1, 14, 14)
            output = model(imgs1, imgs2).view(batch_size, -1)
            loss = criterion(output, train_target.narrow(0, b, batch_size))
            acc_loss += loss.item()
            model.zero_grad()
            loss.backward()
            optimizer.step()
        print(e, acc_loss)

In [6]:
model = Siamese()
train_model(model, train_input, train_target, 100, 250)

0 6.923598349094391
1 6.884099900722504
2 6.88298773765564
3 6.881442368030548
4 6.880037546157837
5 6.878948152065277
6 6.876945734024048
7 6.874556303024292
8 6.891939699649811
9 6.872559428215027
10 6.876327812671661
11 6.880187153816223
12 6.8601542711257935
13 6.860633730888367
14 6.8835413455963135
15 6.880693435668945
16 6.878857970237732
17 6.876905560493469
18 6.883996605873108
19 6.875350892543793
20 6.875253915786743
21 6.878506720066071
22 6.875382959842682
23 6.854404807090759
24 6.871258318424225
25 6.853077232837677
26 6.884506642818451
27 6.831049859523773
28 6.879334330558777
29 6.873808741569519
30 6.87993198633194
31 6.842313647270203
32 6.8692914843559265
33 6.851314187049866
34 6.822456777095795
35 6.812077760696411
36 6.836694061756134
37 6.828101217746735
38 6.801045179367065
39 6.777437090873718
40 6.782059013843536
41 6.80307549238205
42 6.863027632236481
43 6.840596854686737
44 6.803246557712555
45 6.7331782579422
46 6.68571013212204
47 6.77890008687973
48 6.8

In [7]:
def compute_nb_errors(model, input_data, target_data, batch_size):
    nb_errors = 0
    
    for b in range(0, input_data.size(0), batch_size):
            imgs = input_data.narrow(0, b, batch_size)
            target = target_data.narrow(0, b, batch_size)
            imgs1 = imgs[:,0].view(batch_size, 1, 14, 14)
            imgs2 = imgs[:,1].view(batch_size, 1, 14, 14)
            output = model(imgs1, imgs2).view(batch_size, -1)
            pred = output.max(1)[1]
            nb_errors += (pred-target).abs().sum().item()
    
    return nb_errors

In [8]:
train_errors = compute_nb_errors(model, train_input, train_target, 100)
test_errors = compute_nb_errors(model, test_input, test_target, 100)

In [9]:
train_errors/train_input.size(0)

0.001

In [10]:
test_errors/test_input.size(0)
# crazy overfit

0.508