In [1]:
import torch
from torch import nn

#import dlc_practical_prologue as prologue
from Utils.DataImport import DataImport
from Utils.errs import compute_nb_errors_pred as errorr
from Utils.Networks import CNN_SP
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [2]:
#set to use CPU or GPU automatically based on what is available
def select_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
device = select_device()
print('Device is',device)

D = DataImport(device)
Train, Test = D.Train, D.Test

train_input = Train["Train Input"]
test_input = Test["Test Input"]

train_classes = Train["Train Classes"]
test_classes = Test["Test Classes"]

train_target = Train["Train Target"]
test_target = Test["Test Target"]

train_target_binary = Train["Train TB"]
test_target_binary = Test["Test TB"]

train_classes_binary = Train["Train CB"]
test_classes_binary = Test["Test CB"]

train_classes_binary_0 = Train["Train CB0"]
test_classes_binary_0 = Test["Test CB0"]

train_classes_binary_1 = Train["Train CB1"]
test_classes_binary_1 = Test["Test CB1"]


Device is cuda


In [3]:
def train_model(model, criterion_main, criterion_aux, optimizer, train_input, train_classes, train_target, mini_batch_size):

    for e in range(0, 25):
        sum_loss = 0
        # We do this with mini-batches
        for b in range(0, train_input.size(0), mini_batch_size):

            mini_batch_input = train_input.narrow(0, b, mini_batch_size)
            mini_batch_target = train_target.narrow(0, b, mini_batch_size)
            mini_batch_target_aux = train_classes.narrow(0, b, mini_batch_size)


            output_aux, output = model(mini_batch_input)  
            
            loss_aux = criterion_aux(output_aux, mini_batch_target_aux)
            loss_main = criterion_main(output, mini_batch_target)
            
            loss = loss_main + loss_aux

            sum_loss = sum_loss + loss.item()
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            #optimizer_main.zero_grad()
            #optimizer_aux.zero_grad()


            #loss_main.backward(retain_graph = True)
            #optimizer_main.step()
            
            #loss_aux.backward()
            #optimizer_aux.step()

            with torch.no_grad():
                for p in model.parameters():
                    p.sub_(p.sign()*p.abs().clamp(max=0.000))

In [4]:
eta, mini_batch_size, momentum = 0.001, 100, 0.025

model, criterion = CNN_SP(), nn.MSELoss() #net with weight sharing
model, criterion = model.to(device), criterion.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=eta)
#optimizer_aux = torch.optim.Adam(model.aux_out.parameters(), lr=eta)


train_model(model.train(), criterion, criterion, optimizer, train_input, train_classes_binary, train_target_binary, mini_batch_size)

In [5]:
train_e, train_pred = errorr(model, train_input, train_target, mini_batch_size)

test_e, test_pred = errorr(model, test_input, test_target, mini_batch_size)

print('Train error:', train_e/train_input.size(0))

print('Test error:', test_e/test_input.size(0))

print('Number of parameters:', sum(p.numel() for p in model.parameters() if p.requires_grad)) 

Train error: 0.007
Test error: 0.231
Number of parameters: 95928


# Take a look at the input vs prediction

### TRAIN

In [6]:
train_target[:100]

tensor([0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,
        0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1,
        1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1,
        1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,
        1, 0, 1, 1], device='cuda:0')

In [7]:
train_pred[0] #predicted pairs in training for first mini batch of 100

#first 15 elements of each vector below are left and right columns above, repsectively

tensor([0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,
        0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1,
        1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1,
        1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,
        0, 0, 1, 1], device='cuda:0')

In [8]:
#for i in range(10):
#    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
#    ax1.imshow(train_input[i,0,:,:].cpu(), cmap='gray', interpolation='none')
#    ax2.imshow(train_input[i,1,:,:].cpu(), cmap='gray', interpolation='none')

#    plt.show()

In [9]:
#train_classes_binary_0[:10]

In [10]:
#train_classes_binary_1[:10]

### TEST

In [11]:
test_target[:100]

tensor([1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
        0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
        1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,
        1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0,
        0, 1, 1, 0], device='cuda:0')

In [12]:
test_pred[0] #predicted pairs in testing for first mini batch of 100

tensor([1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
        0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
        1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
        0, 1, 1, 0], device='cuda:0')

In [13]:
#for i in range(10):
#    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
#    ax1.imshow(test_input[i,0,:,:].cpu(), cmap='gray', interpolation='none')
#    ax2.imshow(test_input[i,1,:,:].cpu(), cmap='gray', interpolation='none')

#    plt.show()

In [14]:
#test_classes_binary_0[:10]

In [15]:
#test_classes_binary_1[:10]