In [1]:
import dlc_practical_prologue as plg
from torch import nn
from torch.nn import functional as F
import torch
from operator import mul as multiplicator
from functools import reduce

In [2]:
if (torch.cuda.is_available()):
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [43]:
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = plg.generate_pair_sets(N)
train_input = train_input.to(device)
train_target = train_target.to(device)
train_classes = train_classes.to(device)
test_input = test_input.to(device)
test_target = test_target.to(device)
test_classes = test_classes.to(device)

In [44]:
# Normalization
mu, std = train_input.mean(), train_input.std()
train_input = train_input.sub(mu).div(std)
test_input = test_input.sub(mu).div(std)

# Baseline: simple network trained with cross entropy

In [None]:
class Net(nn.Module):
    def __init__(self, nb_hidden1 = 50, nb_hidden2 = 10):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(2, 8, kernel_size=3, padding=2)
        self.avgpool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, dilation=1)
        #self.conv3 = nn.Conv2d(16, 32, kernel_size=3)
        self.fc1 = nn.Linear(16 * 3 * 3, nb_hidden1)
        self.fc2 = nn.Linear(nb_hidden1, nb_hidden2)
        self.fc3 = nn.Linear(nb_hidden2, 2)

    def forward(self, x):
        x = F.relu(self.avgpool1(self.conv1(x)))
        x = F.relu(self.maxpool1(self.conv2(x)))
        x = F.relu(self.fc1(x.view(-1, 16*3*3)))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [99]:
class Net(nn.Module):
    def __init__(self, nb_hidden1 = 50, nb_hidden2 = 10):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(2, 8, kernel_size=3, groups=2)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, dilation=1)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=2)
        self.fc1 = nn.Linear(32*2*2, nb_hidden1)
        self.fc2 = nn.Linear(nb_hidden1, nb_hidden2)
        self.fc3 = nn.Linear(nb_hidden2, 2)

    def forward(self, x):
        x = F.relu(self.maxpool1(self.conv1(x)))
        x = F.relu(self.maxpool2(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = F.relu(self.fc1(x.view(-1, 32*2*2)))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [96]:
# Check on the sizes
X = torch.empty((1, 2, 14, 14)).normal_()
net = Net()
for layer in net.children():
    if isinstance(layer, nn.Linear):
        X = X.view(-1, reduce(multiplicator, list(X.shape[1:])))
    X = layer(X)
    print(layer, 'output shape:\t', X.shape)

Conv2d(2, 8, kernel_size=(3, 3), stride=(1, 1), groups=2) output shape:	 torch.Size([1, 8, 12, 12])
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) output shape:	 torch.Size([1, 8, 6, 6])
Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) output shape:	 torch.Size([1, 16, 6, 6])
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) output shape:	 torch.Size([1, 16, 3, 3])
Conv2d(16, 32, kernel_size=(2, 2), stride=(1, 1)) output shape:	 torch.Size([1, 32, 2, 2])
Linear(in_features=128, out_features=50, bias=True) output shape:	 torch.Size([1, 50])
Linear(in_features=50, out_features=10, bias=True) output shape:	 torch.Size([1, 10])
Linear(in_features=10, out_features=2, bias=True) output shape:	 torch.Size([1, 2])


In [72]:
def train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta, optim="SGD", momentum = 0, nesterov = False):
    
    if (optim == "SGD"):
        optimizer = torch.optim.SGD(model.parameters(), lr = eta, momentum = momentum, nesterov = nesterov)
    if (optim == "Adam"):
        optimizer = torch.optim.Adam(model.parameters(), lr = eta)
        
    for e in range(0, epochs):
        for input_data, target_data in zip(train_input.split(mini_batch_size), train_target.split(mini_batch_size)):
            output = model(input_data)
            loss = criterion(output, target_data)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

In [73]:
def compute_nb_errors(model, data_input, data_target, mini_batch_size):
    tot_err = 0
    for input_data, target_data in zip(data_input.split(mini_batch_size), data_target.split(mini_batch_size)):
        res = model(input_data)
        for i, r in enumerate(res):
            pred = r.max(0)[1].item()
            if(target_data[i])!=pred:
                tot_err+=1
    return tot_err

## First Net

## SGD 

In [46]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 250
eta = 0.01
train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta)

In [47]:
compute_nb_errors(model, train_input, train_target, mini_batch_size)

171

In [48]:
compute_nb_errors(model, test_input, test_target, mini_batch_size)

251

### Optimize over eta (simple optimize)

In [49]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 100
etas = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
N = 10
for eta in etas:
    tot_eta = 0
    for i in range(0, N):
        train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta)
        err = compute_nb_errors(model, test_input, test_target, mini_batch_size)
        tot_eta+=err
    print("Eta = {}, avg_err = {}".format(eta, tot_eta/N))

Eta = 1e-05, avg_err = 530.0
Eta = 0.0001, avg_err = 509.5
Eta = 0.001, avg_err = 456.1
Eta = 0.01, avg_err = 224.5
Eta = 0.1, avg_err = 350.5


### Optimize over eta, momentum, nesterov

In [50]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 100
etas = [1e-4, 1e-3, 1e-2, 1e-1]
momentum = [1e-4, 1e-3, 1e-2, 1e-1]
N = 3
for eta in etas:
    for m in momentum:
        for nest in [False, True]:
            tot_eta = 0
            for i in range(0, N):
                train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta, "SGD", m, nest)
                err = compute_nb_errors(model, test_input, test_target, mini_batch_size)
                tot_eta+=err
            print("Eta = {}, momentum = {}, nesterov = {}, avg_err = {}".format(eta, m, nest, tot_eta/N))

Eta = 0.0001, momentum = 0.0001, nesterov = False, avg_err = 470.0
Eta = 0.0001, momentum = 0.0001, nesterov = True, avg_err = 470.0
Eta = 0.0001, momentum = 0.001, nesterov = False, avg_err = 470.0
Eta = 0.0001, momentum = 0.001, nesterov = True, avg_err = 470.0
Eta = 0.0001, momentum = 0.01, nesterov = False, avg_err = 470.0
Eta = 0.0001, momentum = 0.01, nesterov = True, avg_err = 470.0
Eta = 0.0001, momentum = 0.1, nesterov = False, avg_err = 470.0
Eta = 0.0001, momentum = 0.1, nesterov = True, avg_err = 470.0
Eta = 0.001, momentum = 0.0001, nesterov = False, avg_err = 470.0
Eta = 0.001, momentum = 0.0001, nesterov = True, avg_err = 470.0
Eta = 0.001, momentum = 0.001, nesterov = False, avg_err = 470.0
Eta = 0.001, momentum = 0.001, nesterov = True, avg_err = 470.0
Eta = 0.001, momentum = 0.01, nesterov = False, avg_err = 469.6666666666667
Eta = 0.001, momentum = 0.01, nesterov = True, avg_err = 409.6666666666667
Eta = 0.001, momentum = 0.1, nesterov = False, avg_err = 319.33333333

## Adam

In [54]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 250
eta = 0.01
train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta, "Adam")

In [55]:
compute_nb_errors(model, train_input, train_target, mini_batch_size)

0

In [56]:
compute_nb_errors(model, test_input, test_target, mini_batch_size)

196

### Optimize over eta (simple optimize)

In [57]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 100
etas = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
N = 10
for eta in etas:
    tot_eta = 0
    for i in range(0, N):
        train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta, "Adam")
        err = compute_nb_errors(model, test_input, test_target, mini_batch_size)
        tot_eta+=err
    print("Eta = {}, avg_err = {}".format(eta, tot_eta/N))

Eta = 1e-05, avg_err = 325.2
Eta = 0.0001, avg_err = 235.6
Eta = 0.001, avg_err = 234.2
Eta = 0.01, avg_err = 236.5
Eta = 0.1, avg_err = 470.0


## Second Net

## SGD 

In [100]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 250
eta = 0.01
train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta)

In [101]:
compute_nb_errors(model, train_input, train_target, mini_batch_size)

148

In [102]:
compute_nb_errors(model, test_input, test_target, mini_batch_size)

226

### Optimize over eta (simple optimize)

In [103]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 100
etas = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
N = 10
for eta in etas:
    tot_eta = 0
    for i in range(0, N):
        train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta)
        err = compute_nb_errors(model, test_input, test_target, mini_batch_size)
        tot_eta+=err
    print("Eta = {}, avg_err = {}".format(eta, tot_eta/N))

Eta = 1e-05, avg_err = 530.0
Eta = 0.0001, avg_err = 530.0
Eta = 0.001, avg_err = 470.0
Eta = 0.01, avg_err = 252.5
Eta = 0.1, avg_err = 229.7


### Optimize over eta, momentum, nesterov

In [104]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 100
etas = [1e-4, 1e-3, 1e-2, 1e-1]
momentum = [1e-4, 1e-3, 1e-2, 1e-1]
N = 3
for eta in etas:
    for m in momentum:
        for nest in [False, True]:
            tot_eta = 0
            for i in range(0, N):
                train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta, "SGD", m, nest)
                err = compute_nb_errors(model, test_input, test_target, mini_batch_size)
                tot_eta+=err
            print("Eta = {}, momentum = {}, nesterov = {}, avg_err = {}".format(eta, m, nest, tot_eta/N))

Eta = 0.0001, momentum = 0.0001, nesterov = False, avg_err = 531.6666666666666
Eta = 0.0001, momentum = 0.0001, nesterov = True, avg_err = 476.6666666666667
Eta = 0.0001, momentum = 0.001, nesterov = False, avg_err = 470.0
Eta = 0.0001, momentum = 0.001, nesterov = True, avg_err = 470.0
Eta = 0.0001, momentum = 0.01, nesterov = False, avg_err = 470.0
Eta = 0.0001, momentum = 0.01, nesterov = True, avg_err = 470.0
Eta = 0.0001, momentum = 0.1, nesterov = False, avg_err = 470.0
Eta = 0.0001, momentum = 0.1, nesterov = True, avg_err = 470.0
Eta = 0.001, momentum = 0.0001, nesterov = False, avg_err = 470.0
Eta = 0.001, momentum = 0.0001, nesterov = True, avg_err = 470.0
Eta = 0.001, momentum = 0.001, nesterov = False, avg_err = 470.0
Eta = 0.001, momentum = 0.001, nesterov = True, avg_err = 470.0
Eta = 0.001, momentum = 0.01, nesterov = False, avg_err = 470.0
Eta = 0.001, momentum = 0.01, nesterov = True, avg_err = 470.0
Eta = 0.001, momentum = 0.1, nesterov = False, avg_err = 409.66666666

## Adam

In [105]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 250
eta = 0.01
train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta, "Adam")

In [106]:
compute_nb_errors(model, train_input, train_target, mini_batch_size)

0

In [107]:
compute_nb_errors(model, test_input, test_target, mini_batch_size)

217

### Optimize over eta (simple optimize)

In [108]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
mini_batch_size = 100
epochs = 100
etas = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
N = 10
for eta in etas:
    tot_eta = 0
    for i in range(0, N):
        train_model(model, train_input, train_target, mini_batch_size, criterion, epochs, eta, "Adam")
        err = compute_nb_errors(model, test_input, test_target, mini_batch_size)
        tot_eta+=err
    print("Eta = {}, avg_err = {}".format(eta, tot_eta/N))

Eta = 1e-05, avg_err = 280.9
Eta = 0.0001, avg_err = 198.2
Eta = 0.001, avg_err = 205.6
Eta = 0.01, avg_err = 216.1
Eta = 0.1, avg_err = 470.0


In [109]:
train_input_left = train_input[:,0,:,:].view(N,1,14,14)
train_classes_left = plg.convert_to_one_hot_labels(train_input, train_classes[:,0]).type(torch.LongTensor)
test_input_left = test_input[:,0,:,:].view(N,1,14,14)
test_classes_left = plg.convert_to_one_hot_labels(test_input, test_classes[:,0]).type(torch.LongTensor)

RuntimeError: shape '[10, 1, 14, 14]' is invalid for input of size 196000

In [9]:
def train_model(model, train_input, train_target, mini_batch_size):
    
    if (torch.cuda.is_available()):
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
        
    model.to(device)
    train_input = train_input.to(device)
    train_target = train_target.to(device)
    
    criterion = nn.CrossEntropyLoss().to(device)
    eta = 1e-1
    epochs = 250
    optimizer = torch.optim.SGD(model.parameters(), lr = eta)
    
    for e in range(0, epochs):
        for input_data, target_data in zip(train_input.split(mini_batch_size), train_target.split(mini_batch_size)):
            output = model(input_data)
            #print(output)
            loss = criterion(output, target_data)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

In [10]:
def compute_nb_errors(model, input, target, mini_batch_size):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)
        for k in range(mini_batch_size):
            if target[b + k] != predicted_classes[k]:
                nb_errors = nb_errors + 1

    return nb_errors

In [14]:
model = Net()
if (torch.cuda.is_available()):
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
model.to(device)
train_model(model, train_input_left, train_classes[:,0], 100)

In [15]:
compute_nb_errors(model, train_input_left, test_classes[:,0], 100)

RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'weight'