In [23]:
import dlc_practical_prologue
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from tqdm import trange

In [24]:
print(torch.__version__)

1.7.1


In [25]:
train_input, train_target, train_classes, test_input, test_target, test_classes = dlc_practical_prologue.generate_pair_sets(1000)

In [26]:
# Weight-sharing "Siamese" LeNet
class Siamese(nn.Module):

    def __init__(self):
        super(Siamese, self).__init__()
        
        self.LeNet1 = nn.Sequential(
            nn.Conv2d(1,16,5),  # 16x10x10 (input is 1x14x14)
            nn.MaxPool2d(2),    # 16x5x5
            nn.ReLU(),
            nn.Conv2d(16,32,2), # 32x4x4
            nn.MaxPool2d(2),    # 32x2x2 (-> 1x128 before LeNet2)
            nn.ReLU()
        )
        self.LeNet2 = nn.Sequential(
            nn.Linear(128,64),  # 1x64
            nn.ReLU(),
            nn.Linear(64,32),   # 1x32
            nn.ReLU()
        )
        self.LeNet3 = nn.Sequential(
            nn.Linear(32,16),   # 1x16
            nn.Sigmoid(),
            nn.Linear(16,2)     # 1x2
        )
        
    def forward_bro(self, x):
        x = self.LeNet1(x)
        x = x.view(-1,1,128)
        x = self.LeNet2(x)
        return x
    
    def forward(self, x1, x2):
        x1 = self.forward_bro(x1)
        x2 = self.forward_bro(x2)
        x3 = x2 - x1
        x3 = self.LeNet3(x3)
        return x3

In [31]:
# Weight-sharing "Siamese" LeNet
class Siamese_no_sharing(nn.Module):

    def __init__(self):
        super(Siamese_no_sharing, self).__init__()
        
        self.LeNet1_x1 = nn.Sequential(
            nn.Conv2d(1,16,5),  # 16x10x10 (input is 1x14x14)
            nn.MaxPool2d(2),    # 16x5x5
            nn.ReLU(),
            nn.Conv2d(16,32,2), # 32x4x4
            nn.MaxPool2d(2),    # 32x2x2 (-> 1x128 before LeNet2)
            nn.ReLU()
        )
        self.LeNet2_x1 = nn.Sequential(
            nn.Linear(128,64),  # 1x64
            nn.ReLU(),
            nn.Linear(64,32),   # 1x32
            nn.ReLU()
        )        
        
        self.LeNet1_x2 = nn.Sequential(
            nn.Conv2d(1,16,5),  # 16x10x10 (input is 1x14x14)
            nn.MaxPool2d(2),    # 16x5x5
            nn.ReLU(),
            nn.Conv2d(16,32,2), # 32x4x4
            nn.MaxPool2d(2),    # 32x2x2 (-> 1x128 before LeNet2)
            nn.ReLU()
        )
        self.LeNet2_x2 = nn.Sequential(
            nn.Linear(128,64),  # 1x64
            nn.ReLU(),
            nn.Linear(64,32),   # 1x32
            nn.ReLU()
        )
        
        self.LeNet3 = nn.Sequential(
            nn.Linear(32,16),   # 1x16
            nn.Sigmoid(),
            nn.Linear(16,2)     # 1x2
        )
        
    def forward_x2(self, x):
        x = self.LeNet1_x2(x)
        x = x.view(-1,1,128)
        x = self.LeNet2_x2(x)
        return x  
        
        
    def forward_x1(self, x):
        x = self.LeNet1_x1(x)
        x = x.view(-1,1,128)
        x = self.LeNet2_x1(x)
        return x
    
    def forward(self, x1, x2):
        x1 = self.forward_x1(x1)
        x2 = self.forward_x2(x2)
        x3 = x2 - x1
        x3 = self.LeNet3(x3)
        return x3

In [32]:
# control convolutions' dimensions
x1 = train_input.narrow(0,0,100)
x1 = x1[:,0].view(100,1,14,14)
weight1 = torch.empty(16,1,5,5).normal_()
bias1 = torch.empty(16).normal_()
x1 = F.conv2d(x1, weight1, bias1)

x1 = F.max_pool2d(x1, 2)
x1 = F.relu(x1)

weight2 = torch.empty(32,16,2,2).normal_()
bias2 = torch.empty(32).normal_()
x1 = F.conv2d(x1, weight2, bias2)

x1 = F.max_pool2d(x1, 2)
x1 = F.relu(x1)

x1 = x1.view(-1,1,128)
x1.shape

torch.Size([100, 1, 128])

In [33]:
def train_model(model, train_input, train_target, batch_size, nb_epochs):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr = 1e-1)

    for e in range(nb_epochs):
        acc_loss = 0
        for b in range(0, train_input.size(0), batch_size):
            imgs = train_input.narrow(0, b, batch_size)
            imgs1 = imgs[:,0].view(batch_size, 1, 14, 14)
            imgs2 = imgs[:,1].view(batch_size, 1, 14, 14)
            output = model(imgs1, imgs2).view(batch_size, -1)
            loss = criterion(output, train_target.narrow(0, b, batch_size))
            acc_loss += loss.item()
            model.zero_grad()
            loss.backward()
            optimizer.step()
        print(e, acc_loss)

In [34]:
#model = Siamese()
model = Siamese_no_sharing()
train_model(model, train_input, train_target, 100, 25)

0 6.959452927112579
1 6.8142993450164795
2 6.8592864871025085
3 6.8395684361457825
4 6.808608770370483
5 6.862435340881348
6 6.588079035282135
7 6.56357616186142
8 6.194024980068207
9 5.865689426660538
10 5.478522688150406
11 5.089710146188736
12 5.009523510932922
13 4.836585372686386
14 4.346704363822937
15 3.979830712080002
16 3.9764696061611176
17 4.40175786614418
18 3.3094355165958405
19 3.8473612517118454
20 3.449337124824524
21 2.778465613722801
22 2.942721650004387
23 2.968912735581398
24 3.4355210065841675


In [19]:
def compute_nb_errors(model, input_data, target_data, batch_size):
    nb_errors = 0
    
    for b in range(0, input_data.size(0), batch_size):
            imgs = input_data.narrow(0, b, batch_size)
            target = target_data.narrow(0, b, batch_size)
            imgs1 = imgs[:,0].view(batch_size, 1, 14, 14)
            imgs2 = imgs[:,1].view(batch_size, 1, 14, 14)
            output = model(imgs1, imgs2).view(batch_size, -1)
            pred = output.max(1)[1]
            nb_errors += (pred-target).abs().sum().item()
    
    return nb_errors

In [36]:
train_errors = compute_nb_errors(model, train_input, train_target, 100)
test_errors = compute_nb_errors(model, test_input, test_target, 100)

In [21]:
(train_input.size(0) - train_errors)/train_input.size(0)

0.995

In [22]:
(test_input.size(0) - test_errors)/test_input.size(0)
# crazy overfit

0.851

In [37]:
(test_input.size(0) - test_errors)/test_input.size(0)
#For no sharing

0.83

In [38]:
(train_input.size(0) - train_errors)/train_input.size(0)

0.904

# AUXILLARY

In [56]:
train_input, train_target, train_classes, test_input, test_target, test_classes = dlc_practical_prologue.generate_pair_sets(1000)

In [57]:
# Weight-sharing "Siamese" LeNet
class Siamese(nn.Module):

    def __init__(self):
        super(Siamese, self).__init__()
        
        self.LeNet1 = nn.Sequential(
            nn.Conv2d(1,16,5),  # 16x10x10 (input is 1x14x14)
            nn.MaxPool2d(2),    # 16x5x5
            nn.ReLU(),
            nn.Conv2d(16,32,2), # 32x4x4
            nn.MaxPool2d(2),    # 32x2x2 (-> 1x128 before LeNet2)
            nn.ReLU()
        )
        self.LeNet2 = nn.Sequential(
            nn.Linear(128,64),  # 1x64
            nn.ReLU(),
            nn.Linear(64,32),   # 1x32
            nn.ReLU()
        )
        self.LeNet3 = nn.Sequential(
            nn.Linear(32,16),   # 1x16
            nn.Sigmoid(),
            nn.Linear(16,2)     # 1x2
        )
        self.AuxLayer = nn.Sequential(
            nn.Linear(32,16),   # 1x16
            nn.Sigmoid(),
            nn.Linear(16,10)     # 1x2
        )
        
    def forward_bro(self, x):
        x = self.LeNet1(x)
        x = x.view(-1,1,128)
        x = self.LeNet2(x)
        return x
    
    def forward(self, x1, x2):
        x1 = self.forward_bro(x1)
        x2 = self.forward_bro(x2)
        x3 = x2 - x1
        x2 = self.AuxLayer(x2)
        x2 = self.AuxLayer(x2)
        x3 = self.LeNet3(x3)
        return x1,x2,x3

In [58]:
# control convolutions' dimensions
x1 = train_input.narrow(0,0,100)
x1 = x1[:,0].view(100,1,14,14)
weight1 = torch.empty(16,1,5,5).normal_()
bias1 = torch.empty(16).normal_()
x1 = F.conv2d(x1, weight1, bias1)

x1 = F.max_pool2d(x1, 2)
x1 = F.relu(x1)

weight2 = torch.empty(32,16,2,2).normal_()
bias2 = torch.empty(32).normal_()
x1 = F.conv2d(x1, weight2, bias2)

x1 = F.max_pool2d(x1, 2)
x1 = F.relu(x1)

x1 = x1.view(-1,1,128)
x1.shape

torch.Size([100, 1, 128])

In [59]:
train_classes[0,0]

tensor(1)

In [60]:
train_classes.narrow(0, 0, 5)[:,0]

tensor([1, 7, 0, 8, 1])

In [61]:
train_classes

tensor([[1, 7],
        [7, 0],
        [0, 3],
        ...,
        [2, 6],
        [1, 2],
        [9, 2]])

In [85]:
def train_model(model, train_input, train_target,train_classes, batch_size, nb_epochs):
    criterion = nn.CrossEntropyLoss()
    aux_criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr = 1e-1)

    for e in range(nb_epochs):
        acc_loss = 0
        for b in range(0, train_input.size(0), batch_size):
            imgs = train_input.narrow(0, b, batch_size)
            imgs1 = imgs[:,0].view(batch_size, 1, 14, 14)
            imgs2 = imgs[:,1].view(batch_size, 1, 14, 14)
            x1_pred, x2_pred , output = model(imgs1, imgs2)
            print("batch" , batch_size)
            print("x1_shape: ", x2_pred.shape)
            x1_pred = x1_pred.view(batch_size, -1)
            x2_pred = x2_pred.view(batch_size, -1)
            output = output.view(batch_size, -1)
            loss = criterion(output, train_target.narrow(0, b, batch_size))
            #print("x1_shape: ", x1_pred.shape)
            print("classes_shape: ", train_classes.narrow(0, b, batch_size)[:,0].shape)
            loss_aux1 = aux_criterion(x1_pred,train_classes.narrow(0, b, batch_size)[:,0])
            loss_aux1 += aux_criterion(x2_pred,train_classes.narrow(0, b, batch_size)[:,1])
            loss = loss + loss_aux1
            acc_loss += loss.item()
            model.zero_grad()
            loss.backward()
            optimizer.step()
        print(e, acc_loss)

In [86]:
model = Siamese()
train_model(model, train_input, train_target,train_classes, 100, 25)

batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
0 11090.51515007019
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape:  torch.Size([100, 1, 32])
classes_shape:  torch.Size([100])
batch 100
x1_shape: 

In [76]:
def compute_nb_errors(model, input_data, target_data, batch_size):
    nb_errors = 0
    
    for b in range(0, input_data.size(0), batch_size):
            imgs = input_data.narrow(0, b, batch_size)
            target = target_data.narrow(0, b, batch_size)
            imgs1 = imgs[:,0].view(batch_size, 1, 14, 14)
            imgs2 = imgs[:,1].view(batch_size, 1, 14, 14)
            _,_, output = model(imgs1, imgs2)
            output = output.view(batch_size, -1)
            pred = output.max(1)[1]
            nb_errors += (pred-target).abs().sum().item()
    
    return nb_errors

In [77]:
train_errors = compute_nb_errors(model, train_input, train_target, 100)
test_errors = compute_nb_errors(model, test_input, test_target, 100)

In [78]:
print("train acc:" , (train_input.size(0) - train_errors)/train_input.size(0))
print("test acc:" , (test_input.size(0) - test_errors)/test_input.size(0))

train acc: 0.543
test acc: 0.574
