## Imports

In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import time

import dlc_practical_prologue

## Load data

In [2]:
N = 1000

x_train, y_train, y_train_classes, x_test, y_test, y_test_classes = \
dlc_practical_prologue.generate_pair_sets(N)

assert x_train.shape == torch.Size([N, 2, 14, 14])
assert y_train.shape == torch.Size([N])
assert y_train_classes.shape == torch.Size([N, 2])
assert x_test.shape == torch.Size([N, 2, 14, 14])
assert y_test.shape == torch.Size([N])
assert y_test_classes.shape == torch.Size([N, 2])

## Define model

In [3]:
class model_1(nn.Module):
    def __init__(self):
        super(model_1, self).__init__()
        self.conv1 = nn.Conv2d(2, 16, kernel_size=3)        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)

        self.dense1 = nn.Linear(288, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), kernel_size=2)
        x = F.max_pool2d(self.conv2(x), kernel_size=2)
        x = F.relu(self.dense1(torch.flatten(x, 1)))
        x = F.sigmoid(self.dense2(x))
        return x

In [4]:
class model_2(nn.Module):
    def __init__(self):
        super(model_1, self).__init__()
        self.conv1 = nn.Conv2d(2, 10, kernel_size=3, padding=2)        
        self.conv2 = nn.Conv2d(10, 20, kernel_size=3, padding=2)
        self.conv3 = nn.Conv2d(20, 10, kernel_size=3, padding=2)
        self.conv4 = nn.Conv2d(10, 5, kernel_size=3, padding=2)
        self.conv5 = nn.Conv2d(5, 2, kernel_size=3, padding=2)

        self.dense1 = nn.Linear(72, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(self.conv4(x), kernel_size=2)
        x = F.max_pool2d(self.conv5(x), kernel_size=2)
        
        x = F.relu(self.dense1(torch.flatten(x, 1)))
        x = torch.flatten(torch.sigmoid(self.dense2(x))) # Last flatten to have [batch_size] instead of [batch_size, 1]
        return x

In [5]:
class model_weight_sharing(nn.Module):
    def __init__(self):
        super(model_weight_sharing, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=3)        
        self.conv2 = nn.Conv2d(20, 40, kernel_size=3)
        self.conv3 = nn.Conv2d(40, 40, kernel_size=3)

        self.dense1 = nn.Linear(320, 40)
        self.dense2 = nn.Linear(40, 1)
        
    def forward(self, x):
        d1 = F.relu(self.conv1(x[:, 0:1, ...]), kernel_size=2)
        d1 = F.max_pool2d(self.conv2(d1), kernel_size=2)
        d1 = F.max_pool2d(self.conv3(d1), kernel_size=2)
        d1 = torch.flatten(d1, 1)
        
        d2 = F.relu(self.conv1(x[:, 1:2, ...]), kernel_size=2)
        d2 = F.max_pool2d(self.conv2(d2), kernel_size=2)
        d2 = F.max_pool2d(self.conv3(d2), kernel_size=2)
        d2 = torch.flatten(d2, 1)

        x = torch.cat((d1, d2), 1)
        x = F.relu(self.dense1(torch.flatten(x, 1)))
        x = torch.flatten(torch.sigmoid(self.dense2(x)))
        return x

In [6]:
class model_auxiliary_loss(nn.Module):
    def __init__(self):
        super(model_auxiliary_loss, self).__init__()
        self.conv1 = nn.Conv2d(2, 10, kernel_size=3)   #2*14*14 => 10*12*12  (Maxpool => 10*6*6)  
        self.conv2 = nn.Conv2d(10, 10, kernel_size=3)  #10*6*6 => 10* 4 * 4 (Maxpool => 10*2*2)

        self.dense_digits1 = nn.Linear(40, 10)
        self.dense_digits2 = nn.Linear(40, 10)
        
        self.dense1 = nn.Linear(40, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), kernel_size=2)
        x = F.max_pool2d(self.conv2(x), kernel_size=2)
        
        x = torch.flatten(x, 1)
        
        self.digits1 = F.softmax((self.dense_digits1(x)), -1)
        self.digits2 = F.softmax((self.dense_digits2(x)), -1)
        
        x = F.relu(self.dense1(x))
        x = torch.flatten(torch.sigmoid(self.dense2(x)))
        return x

## Training/Evaluation function

In [7]:
def train_model(model, train_input, train_target, epochs, optimizer, loss_function = nn.BCELoss(), batch_size=10):
    # Inspired by exercise corrige 
    model.train()
    
    losses = []
    
    for e in range(epochs):
        sum_loss = 0
        
        for x_batch, y_batch in zip(x_train.split(batch_size),
                                    y_train.split(batch_size)):
            
            output = model(x_batch)
            loss = loss_function(output, y_batch.float())
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
            
            
        losses.append(sum_loss)
        print("Epoch %i : loss %.2f" % (e, sum_loss), end = "\r")
        
    return losses

In [8]:
def train_auxiliary_model(model, train_input, train_target, digit_target,
                          epochs, optimizer, loss_function = nn.BCELoss(), batch_size=10):
    # Inspired by exercise corrige 
    model.train()
    
    losses = []
    digit_loss = nn.CrossEntropyLoss()
    
    for e in range(epochs):
        sum_loss = 0
        sum_loss_digit1 = 0
        sum_loss_digit2 = 0
        
        for x_batch, y_batch, y_digit_batch in zip(x_train.split(batch_size),
                                                    y_train.split(batch_size),
                                                    digit_target.split(batch_size)):
            
            output = model(x_batch)

            loss_digit1 = digit_loss(model.digits1, y_digit_batch[..., 0])
            loss_digit2 = digit_loss(model.digits2, y_digit_batch[..., 1])
            loss = loss_function(output, y_batch.float())
            
            loss_total = loss_digit1 + loss_digit2 + loss

            optimizer.zero_grad()
            loss_total.backward()  
            optimizer.step()
            
            sum_loss_digit1 = sum_loss_digit1 + loss_digit1.item()
            sum_loss_digit2 = sum_loss_digit2 + loss_digit2.item()
            sum_loss = sum_loss + loss.item()
            
            
        losses.append(sum_loss)
        print("Epoch %i : loss %.2f --- loss_digit1 %.2f --- loss_digit2 %.2f" \
              % (e,sum_loss, sum_loss_digit1, sum_loss_digit2))
        
    return losses

In [9]:
def train_auxiliary_model_2(model, train_input, train_target, digit_target,
                          epochs, optimizer, loss_function = nn.BCELoss(), batch_size=10):
    # Inspired by exercise corrige 
    model.train()
    
    losses = []
    digit_loss = nn.CrossEntropyLoss()
    
    for e in range(epochs):
        sum_loss = 0
        sum_loss_digit1 = 0
        sum_loss_digit2 = 0
        
        for x_batch, y_batch, y_digit_batch in zip(x_train.split(batch_size),
                                                    y_train.split(batch_size),
                                                    digit_target.split(batch_size)):
            
            output = model(x_batch)

            loss1 = loss_function(model.pred1, y_batch.float())
            loss2 = loss_function(model.pred2, y_batch.float())
            
            loss_total = loss1 + loss2

            optimizer.zero_grad()
            loss_total.backward()  
            optimizer.step()
            
            sum_loss_digit1 = sum_loss_digit1 + loss1.item()
            sum_loss_digit2 = sum_loss_digit2 + loss2.item()
            sum_loss = sum_loss + loss_total.item()
            
            
        losses.append(sum_loss)
        print("Epoch %i : loss %.2f --- loss_digit1 %.2f --- loss_digit2 %.2f" \
              % (e,sum_loss, sum_loss_digit1, sum_loss_digit2), end="\r")
        
    return losses

In [10]:
def evaluate_model(model, test_input, test_target):
    model.eval()
    criterion = nn.BCELoss()
    preds_proba = model(test_input).view(-1)
    preds = preds_proba.masked_fill((preds_proba > 0.5), 1).masked_fill((preds_proba<0.5), 0)
    
    loss = criterion(preds_proba, test_target.float()).item()
    accuracy = (preds == test_target).sum().item()/preds.size(0)
    #accuracy = sum([pred == truth for pred, truth in zip(preds, test_target)])
    return loss, accuracy

In [11]:
def mean(x):
    return sum(x)/len(x)

def var(x):
    u = mean(x)
    return sum([(v-u)**2 for v in x])/len(x)

In [12]:
def number_parameters(model):
    return sum(p.numel() for p in model.parameters())

## Experiment

In [13]:
class model_weight_sharing(nn.Module):
    def __init__(self):
        super(model_weight_sharing, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=3)        
        self.conv2 = nn.Conv2d(20, 40, kernel_size=3)
        self.conv3 = nn.Conv2d(40, 40, kernel_size=3)

        self.dense1 = nn.Linear(40, 40)
        self.dense2 = nn.Linear(40, 1)
        
    def forward(self, x):
        d1 = F.relu(self.conv1(x[:, 0:1, ...]))
        d1 = F.max_pool2d(self.conv2(d1), kernel_size=2)
        d1 = F.max_pool2d(self.conv3(d1), kernel_size=2)
        d1 = torch.flatten(d1, 1)
        
        d2 = F.relu(self.conv1(x[:, 1:2, ...]))
        d2 = F.max_pool2d(self.conv2(d2), kernel_size=2)
        d2 = F.max_pool2d(self.conv3(d2), kernel_size=2)
        d2 = torch.flatten(d2, 1)

        x = torch.cat((d1, d2), 1)
        x = F.relu(self.dense1(torch.flatten(x, 1)))
        x = torch.flatten(torch.sigmoid(self.dense2(x)))
        return x

In [14]:
class model_1(nn.Module):
    def __init__(self):
        super(model_1, self).__init__()
        self.conv1 = nn.Conv2d(2, 20, kernel_size=5)        
        self.conv2 = nn.Conv2d(20, 40, kernel_size=5)
        self.conv3 = nn.Conv2d(40, 60, kernel_size=5)

        self.dense1 = nn.Linear(60, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(self.conv3(x), kernel_size=2)        
        
        x = F.relu(self.dense1(torch.flatten(x, 1)))
        x = torch.flatten(torch.sigmoid(self.dense2(x)))
        return x

In [15]:
class model_2(nn.Module):
    def __init__(self):
        super(model_2, self).__init__()
        self.conv1 = nn.Conv2d(2, 20, kernel_size=5, padding=2)        
        self.conv2 = nn.Conv2d(20, 40, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(40, 20, kernel_size=5, padding=2)
        self.conv4 = nn.Conv2d(20, 40, kernel_size=5, padding=2)

        self.dense1 = nn.Linear(360, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(F.relu(self.conv2(x)), kernel_size=2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(self.conv4(x), kernel_size=2)        
        
        x = F.relu(self.dense1(torch.flatten(x, 1)))
        x = torch.flatten(torch.sigmoid(self.dense2(x)))
        return x

In [16]:
class model_auxiliary_loss(nn.Module):
    def __init__(self):
        super(model_auxiliary_loss, self).__init__()
        self.conv1 = nn.Conv2d(2, 20, kernel_size=3)   #2*14*14 => 10*12*12  (Maxpool => 10*6*6)  
        self.conv2 = nn.Conv2d(20, 40, kernel_size=3)  #10*6*6 => 10* 4 * 4 (Maxpool => 10*2*2)
        self.conv3 = nn.Conv2d(40, 60, kernel_size=3)
        self.conv4 = nn.Conv2d(60, 40, kernel_size=3)
        
        self.dense_digits1 = nn.Linear(360, 10)
        self.dense_digits2 = nn.Linear(360, 10)
        
        self.dense1 = nn.Linear(360, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(self.conv4(x), kernel_size=2)
        
        x = torch.flatten(x, 1)
        
        self.digits1 = F.softmax((self.dense_digits1(x)), -1)
        self.digits2 = F.softmax((self.dense_digits2(x)), -1)
    
        x = F.relu(self.dense1(x))
        x = torch.flatten(torch.sigmoid(self.dense2(x)))
        return x

In [17]:
class model_auxiliary_loss_ws(nn.Module):
    def __init__(self):
        super(model_auxiliary_loss_ws, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=3)   #2*14*14 => 10*12*12  (Maxpool => 10*6*6)  
        self.conv2 = nn.Conv2d(20, 40, kernel_size=3)  #10*6*6 => 10* 4 * 4 (Maxpool => 10*2*2)
        self.conv3 = nn.Conv2d(40, 60, kernel_size=3)
        self.conv4 = nn.Conv2d(60, 40, kernel_size=3)
        
        self.dense_digits = nn.Linear(360, 10)
        
        self.dense1 = nn.Linear(360, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x1 = F.relu(self.conv1(x[:, 0:1, ...]))
        x1 = F.relu(self.conv2(x1))
        x1 = F.relu(self.conv3(x1))
        x1 = F.max_pool2d(self.conv4(x1), kernel_size=2)
        x1 = torch.flatten(x1, 1)
        
        x2 = F.relu(self.conv1(x[:, 1:2, ...]))
        x2 = F.relu(self.conv2(x2))
        x2 = F.relu(self.conv3(x2))
        x2 = F.max_pool2d(self.conv4(x2), kernel_size=2)
        x2 = torch.flatten(x1, 1)
        
        x = torch.flatten(x, 1)
        
        self.digits1 = F.softmax((self.dense_digits(x1)), -1)
        self.digits2 = F.softmax((self.dense_digits(x2)), -1)
        
        return (torch.argmax(self.digits1, axis = 1) < torch.argmax(self.digits2, axis = 1)).type(torch.float32)

In [18]:
class model_auxiliary_loss_2(nn.Module):
    def __init__(self):
        super(model_auxiliary_loss_2, self).__init__()
        self.conv1 = nn.Conv2d(2, 20, kernel_size=3, padding=2)   #2*14*14 => 10*12*12  (Maxpool => 10*6*6)  
        self.conv2 = nn.Conv2d(20, 20, kernel_size=3, padding=2)  #10*6*6 => 10* 4 * 4 (Maxpool => 10*2*2)
        self.conv3 = nn.Conv2d(20, 20, kernel_size=3)
        self.conv4 = nn.Conv2d(20, 20, kernel_size=3)
        
        self.dense1 = nn.Linear(980, 50)
        
        self.conv5 = nn.Conv2d(20, 20, kernel_size=3, padding=2)
        self.conv6 = nn.Conv2d(20, 20, kernel_size=3, padding=2)
        self.conv7 = nn.Conv2d(20, 20, kernel_size=3)
        self.conv8 = nn.Conv2d(20, 20, kernel_size=3)
        
        self.dense2 = nn.Linear(980, 50)
        
        self.pred = nn.Linear(50, 1)
        
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        
        self.pred1 = torch.flatten(F.max_pool2d(x, kernel_size = 2), 1)
        self.pred1 = F.relu(self.dense1(self.pred1))
        self.pred1 = torch.flatten(F.sigmoid(self.pred(self.pred1)))
        
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = F.relu(self.conv7(x))
        x = F.relu(self.conv8(x))
        
        self.pred2 = torch.flatten(F.max_pool2d(x, kernel_size = 2), 1)
        self.pred2 = F.relu(self.dense2(self.pred2))
        self.pred2 = torch.flatten(F.sigmoid(self.pred(self.pred2)))
        
        return self.pred2

In [19]:
number_training = 10
epochs = 25

losses = []
train_losses = []
accuracies = []
times = []

auxiliary = False

# build_model = model_1
build_model = model_2
# build_model = model_weight_sharing

if auxiliary:
    build_model = model_auxiliary_loss_2
#     build_model = model_auxiliary_loss_ws

print("Starting %i training of %i epochs, with model containing %i parameters." % 
      (number_training, epochs, number_parameters(build_model())))

for i_train in range(number_training):
    
    start = time.time()
    
    model = build_model()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

    indices_shuffle = torch.randperm(N)
    
    if not auxiliary :
        train_loss = train_model(model, 
                                x_train[indices_shuffle],
                                y_train[indices_shuffle],  
                                epochs = epochs,
                                optimizer=optimizer,
                                batch_size = 50)

    else :
        train_loss = train_auxiliary_model_2(model, 
                                        x_train[indices_shuffle],
                                        y_train[indices_shuffle], 
                                        y_train_classes[indices_shuffle],
                                        epochs = epochs,
                                        optimizer=optimizer,
                                        batch_size = 50)
    
    loss, accuracy = evaluate_model(model, x_test, y_test)
    accuracy *= 100
    print("Attempt", i_train + 1, ": loss", loss, "- accuracy %.1f%%" % accuracy)
    losses.append(loss)
    train_losses.append(train_loss)
    accuracies.append(accuracy)
    
    times.append(time.time() - start)

print("Experiment results :")
print("Loss mean : %.2f (%.3f)" % (mean(losses), var(losses)))
print("Accuracy mean : %.2f%% (%.1f)" % (mean(accuracies), var(accuracies)))
print("Average training time : %.1f seconds" % (mean(times), var(times)))

Starting 10 training of 25 epochs, with model containing 64741 parameters.
Attempt 1 : loss 0.6954890489578247 - accuracy 83.4%
Attempt 2 : loss 0.7117046117782593 - accuracy 81.8%
Attempt 3 : loss 0.6808319091796875 - accuracy 81.6%
Attempt 4 : loss 0.6996515989303589 - accuracy 82.7%
Attempt 5 : loss 0.6557032465934753 - accuracy 82.3%
Attempt 6 : loss 0.7659114599227905 - accuracy 82.5%
Attempt 7 : loss 0.6973085403442383 - accuracy 81.9%
Attempt 8 : loss 0.692147970199585 - accuracy 52.6%
Attempt 9 : loss 0.8641268014907837 - accuracy 78.6%
Attempt 10 : loss 0.729652464389801 - accuracy 81.7%
Experiment results :
Loss mean : 0.72 (0.003)
Accuracy mean : 78.91% (78.4)


TypeError: not all arguments converted during string formatting