In [1]:
#!/usr/bin/env python

import torch
from torch import nn
from torch import optim
from torch import Tensor
from torch.nn import functional as F
import dlc_practical_prologue as prologue

In [23]:
#Data generation
N=10**3
train_input,train_target,train_classes,test_input,test_target,test_classes=prologue.generate_pair_sets(N)
train_target=train_target.long()#.float() instead of .long for the MSELoss
train_input=train_input.float()

In [21]:
#Base functions adapted from the practicals
def train_model(model, train_input, train_target, mini_batch_size, crit=nn.MSELoss, eta = 1e-3, nb_epochs = 1000,print_=False):
    criterion = crit()
    optimizer = optim.SGD(model.parameters(), lr = eta)
    for e in range(nb_epochs):
        acc_loss = 0

        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            if crit==nn.MSELoss:
                loss = criterion(output[:,1], train_target.narrow(0, b, mini_batch_size))
            elif crit==nn.CrossEntropyLoss:
                loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            else:
                print("Loss not implemented")
            acc_loss = acc_loss + loss.item()
            model.zero_grad()
            loss.backward()
            optimizer.step()
            if False:
                with torch.no_grad():
                    for p in model.parameters():
                        p -= eta * p.grad

        if(e%20==0 and print_):
            print(e, acc_loss)
            
def compute_nb_errors(model, input, target, mini_batch_size):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)
        for k in range(mini_batch_size):
            if target[b + k]!=predicted_classes[k]:
                nb_errors = nb_errors + 1

    return nb_errors

def run_many_times(model,crit=nn.MSELoss,mini_batch_size=100,n=10,print_=False):
    average_error=0
    for i in range(n):
        m=model()
        train_model(m, train_input, train_target,mini_batch_size,crit=crit)
        nb_test_errors = compute_nb_errors(m, test_input, test_target, mini_batch_size)
        print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))
        average_error+=(100 * nb_test_errors) / test_input.size(0)
    print("Average error: "+str(average_error/n))

In [17]:
#Is it better to use groups or not?
#Takes about 2 hours to run
#about 22.5% error average without groups if we exclude outliers that get stuck and don't move
#about 21.5% error average with groups if we exclude outliers that get stuck and don't move
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3)
        #self.conv2 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(512, 20)
        self.fc2 = nn.Linear(20, 2)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        #x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 512)))
        x = F.softmax(self.fc2(x), dim=1)
        #print(x)
        return x

class NetGroups(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3, groups=2)
        #self.conv2 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(512, 20)
        self.fc2 = nn.Linear(20, 2)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        #x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 512)))
        x = F.softmax(self.fc2(x), dim=1)
        #print(x)
        return x

print("Without groups:")
run_many_times(Net)

print("With groups:")
run_many_times(NetGroups)

With groups:
test error Net 22.10% 221/1000
test error Net 22.10% 221/1000
test error Net 21.20% 212/1000
test error Net 19.00% 190/1000
test error Net 19.40% 194/1000
test error Net 19.20% 192/1000
test error Net 19.80% 198/1000
test error Net 22.90% 229/1000
test error Net 52.60% 526/1000
test error Net 21.10% 211/1000
test error Net 22.30% 223/1000
test error Net 21.50% 215/1000
test error Net 21.00% 210/1000
test error Net 22.00% 220/1000
test error Net 21.90% 219/1000
test error Net 47.40% 474/1000
test error Net 22.10% 221/1000
test error Net 19.80% 198/1000
test error Net 23.00% 230/1000
test error Net 22.10% 221/1000
Average error: 24.125


In [None]:
#Is it better with Cross entropy loss rather than MSE?
#Doesn't seem significantly better
print("With Cross Entropy Loss:")
run_many_times(NetGroups,crit=nn.CrossEntropyLoss)

With Cross Entropy Loss:
test error Net 21.00% 210/1000
test error Net 21.20% 212/1000
