In [97]:
import torch
import dlc_practical_prologue as prologue

from torch import optim
from torch.nn import functional as F
from torch import nn
from torch.autograd import Variable

In [98]:
def hand_comparison(digit_1, digit_2):
    
    predicted_class = torch.zeros(digit_1.size(0))
    
    _, digit_1 = torch.max(digit_1, 1)
    _, digit_2 = torch.max(digit_2, 1)
    
    for i in range(digit_1.size(0)):
        if digit_1[i] <= digit_2[i]:
            predicted_class[i] = 1
    
    return predicted_class

In [99]:
def compute_nb_errors(model, data_input, data_target, mini_batch_size):

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        ##################_, _, result = model(data_input.narrow(0, b, mini_batch_size))
        # Make a function that gives the reult calculating by hand
        # Result is the given result with data_input
        digit_1, digit_2 = model(data_input.narrow(0, b, mini_batch_size))
        predicted_class = hand_comparison(digit_1, digit_2) # Gives 0 or 1, second output of the function max
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_class[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

In [100]:
def compute_nb_errors_comparison(model, data_input, data_target, data_classes, mini_batch_size):
    
    nb_data_errors = 0
    nb_errors_comparison = 0
    nb_errors_digit1 = 0
    nb_errors_digit2 = 0
    
    for b in range(0, data_input.size(0), mini_batch_size):
        digit1, digit2, result = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(result, 1)
        _, predicted_classes_digit1 = torch.max(digit1, 1)
        _, predicted_classes_digit2 = torch.max(digit2, 1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1
            if data_classes[b + k, 0] != predicted_classes_digit1[k]:
                nb_errors_digit1 += 1
            if data_classes[b + k, 1] != predicted_classes_digit2[k]:
                nb_errors_digit2 += 1
            if ((predicted_classes_digit1[k] < predicted_classes_digit2[k]) and predicted_classes[k] == 0):
                nb_errors_comparison += 1
            if ((predicted_classes_digit1[k] > predicted_classes_digit2[k]) and predicted_classes[k] == 1):
                nb_errors_comparison += 1
            
    return nb_data_errors, nb_errors_comparison, nb_errors_digit1, nb_errors_digit2

In [101]:
def accuracy_based_on_imgs(model, data_input, data_target):
    digit1, digit2, result = model(data_input)

    _, predictions1 = torch.max(digit1.data, 1)
    _, predictions2 = torch.max(digit2.data, 1)
    
    predictions = (predictions1 <= predictions2).long()
    well_predicted_count = (predictions == data_target).sum().item()

    return 1 - well_predicted_count / data_input.size(0)

In [102]:
def accuracy_based_on_result(model, data_input, data_target):
    digit1, digit2, result = model(data_input)
    
    _, predictions = torch.max(result.data, 1)
    well_predicted_count = (predictions == test_target_).sum().item()
    
    return 1 - well_predicted_count / total

In [103]:
######################################################################
def train_model_decay(model, train_input, train_target, train_classes, nb_epochs, mini_batch_size):
    criterion = nn.CrossEntropyLoss()
    eta0 = 1e-1
    decay = 1

    for e in range(nb_epochs):
        eta = (1 / (1 + decay*nb_epochs)) * eta0
        optimizer = optim.SGD(model.parameters(), lr = eta)
        
        for b in range(0, train_input.size(0), mini_batch_size):
            digit1, digit2, result = model(train_input.narrow(0, b, mini_batch_size))
            
            loss_result = criterion(result, train_target.narrow(0, b, mini_batch_size))
            loss_digit1 = criterion(digit1, train_classes[:,0].narrow(0, b, mini_batch_size))
            loss_digit2 = criterion(digit2, train_classes[:,1].narrow(0, b, mini_batch_size))
            loss = loss_result + loss_digit1 + loss_digit2
            
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [104]:
######################################################################
def train_model(model, train_input, train_target, train_classes, nb_epochs, mini_batch_size):
    criterion = nn.CrossEntropyLoss()
    eta = 1e-3
    optimizer = optim.Adam(model.parameters(), lr = eta)
    
    for e in range(nb_epochs):    
        
        for b in range(0, train_input.size(0), mini_batch_size):
            digit1, digit2 = model(train_input.narrow(0, b, mini_batch_size))
            
            loss_digit1 = criterion(digit1, train_classes[:,0].narrow(0, b, mini_batch_size))
            loss_digit2 = criterion(digit2, train_classes[:,1].narrow(0, b, mini_batch_size))
            loss = loss_digit1 + loss_digit2
            
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [105]:
######################################################################   
class ConvNoWS(nn.Module):
    def __init__(self):
        super(ConvNoWS, self).__init__()
        
        #Input channels = 1, output channels = 32
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2))
        
        #Input channels = 32, output channels = 64
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=2, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2))

        
        # Formula to get out_put size (in_size - kernel_size + 2*(padding)) / stride) + 1
        # first layer (14-5+2*2)/1 +1 = 14/2 = 7
        # second layer (7 -4 +2*2)/1 +1 = 8/2 = 4
        # 4 * 4 * 64 input features, 1000 output features
        self.fc1 = nn.Linear(4 * 4 * 64, 1000)
        
        # 1000 input features, 2 output features
        self.fc2 = nn.Linear(1000, 10)
        
    def forward(self, x):
        
        first_digit = x[:,[0]]
        second_digit = x[:,[1]]

        first_digit = self.layer1(first_digit)
        second_digit = self.layer1(second_digit)
        
        first_digit = self.layer2(first_digit)
        second_digit = self.layer2(second_digit)
    
        first_digit = F.relu(self.fc1(first_digit.view(-1, 4 * 4 * 64)))
        second_digit = F.relu(self.fc1(second_digit.view(-1, 4 * 4 * 64)))
        
        first_digit = self.fc2(first_digit)
        second_digit = self.fc2(second_digit)
        
        return first_digit, second_digit

In [106]:
def get_tests(n):
    M = []
    for k in range (0, n):
        L = []
        _, _, _, test_input, test_target, test_classes =  prologue.generate_pair_sets(1000)
        L.append(test_input)
        L.append(test_target)
        L.append(test_classes)
        M.append(L)
    return M


In [107]:
    
######################################################################   
    
train_input, train_target, train_classes, _, _, _ \
    = prologue.generate_pair_sets(1000)
    

# train_input, train_target, train_classes \
#     = Variable(train_input), Variable(train_target), Variable(train_classes)
# test_input, test_target, test_classes \
#     = Variable(test_input), Variable(test_target), Variable(test_classes)


model = ConvNoWS()
nb_epochs = 25
mini_batch_size = 100

train_model(model, train_input, train_target, train_classes, nb_epochs, mini_batch_size)
L = get_tests(10)


nb_train_errors = compute_nb_errors(model, train_input, train_target, mini_batch_size)
#accuracy_based_on_imgs = accuracy_based_on_imgs(model, train_input, train_target)
#accuracy_based_on_result = accuracy_based_on_result(model, train_input, train_target)

print('train error ConvNoWS {:0.2f}%{:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                        nb_train_errors, train_input.size(0)))
#print('train accuracy_based_on_imgs ConvNoWS {:0.2f}%{:d}/{:d}'.format((100 * accuracy_based_on_imgs) / train_input.size(0),
#                                        accuracy_based_on_imgs, train_input.size(0)))
#print('train accuracy_based_on_result ConvNoWS {:0.2f}%{:d}/{:d}'.format((100 * accuracy_based_on_result) / train_input.size(0),
#                                        accuracy_based_on_result, train_input.size(0)))

nb_moy_test_error = 0
#average_test_error_basedOnImages = 0
#average_test_error_basedOnResults = 0

for k in range (0, len(L)):
    nb_test_errors = compute_nb_errors(model, L[k][0], L[k][1], mini_batch_size)
    #test_error_basedOnImages = accuracy_based_on_imgs(model, L[k][0], L[k][1])
    #test_error_basedOnResults = accuracy_based_on_result(model, L[k][0], L[k][1])
    
    nb_moy_test_error += nb_test_errors
    #average_test_error_basedOnImages += test_error_basedOnImages
    #average_test_error_basedOnResults += test_error_basedOnResults

    print('test error ConvNoWS {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / L[k][0].size(0),
                                                nb_test_errors, L[k][0].size(0)))
    #print('train accuracy_based_on_imgs ConvNoWS {:0.2f}%{:d}/{:d}'.format((100 * test_error_basedOnImages) / L[k][0].size(0),
    #                                    test_error_basedOnImages, L[k][0].size(0)))
    #print('train accuracy_based_on_result ConvNoWS {:0.2f}%{:d}/{:d}'.format((100 * test_error_basedOnResults) / L[k][0].size(0),
    #                                    test_error_basedOnResults, L[k][0].size(0)))
print('Average test error ConvNoWS {:0.2f}% {:0.1f}/{:d}'.format((100*nb_moy_test_error/10) / L[0][0].size(0),nb_moy_test_error/10, L[0][0].size(0) ))


train error ConvNoWS 0.00%0/1000
test error ConvNoWS 4.50% 45/1000
test error ConvNoWS 3.50% 35/1000
test error ConvNoWS 4.00% 40/1000
test error ConvNoWS 3.60% 36/1000
test error ConvNoWS 3.50% 35/1000
test error ConvNoWS 4.30% 43/1000
test error ConvNoWS 3.10% 31/1000
test error ConvNoWS 3.70% 37/1000
test error ConvNoWS 4.00% 40/1000
test error ConvNoWS 3.20% 32/1000
Average test error ConvNoWS 3.74% 37.4/1000
