# Mini-Project 1

Recap. session 03.04.2019: Discussion with teaching assistant. 

Do 3 different architectures:
1. Fully-Connected Networks (Deep & Shallow)
2. Convolution Neural Network 
3. Residual Neural Network

For each architecture: 
- output 0 or 1 (directly predict if number is smaller or not)
- output class (give digit of each image and do comparison)
- output 0 or 1 + class of digit (enables 3 different loss functions)

Note:
<br> - Do weight sharing: each image enters a different building block 

In [3]:
from helper_functions import *
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F

## Load datasets

In [4]:
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(N)

Name | Tensor dimension | Type | Content
-----|-----|-----|-----
`train_input` | N × 2 × 14 × 14 | float32 | Images
`train_target` | N | int64 | Class to predict ∈ {0, 1}
`train_classes` | N × 2 | int64 | Classes of the two digits ∈ {0, . . . , 9}
`test_input` | N × 2 × 14 × 14 | float32 | Images
`test_target` | N | int64 | Class | to predict ∈ {0, 1}
`test_classes` | N × 2 | int64 | Classes of the two digits ∈ {0, . . . , 9}

# Fully-Connected Networks

## 1. Output 0 or 1

### Preprocess Data

In [5]:
train_input, test_input = reshape_data(train_input, test_input)

### Define Models

In [None]:
def create_shallow_model():
    return nn.Sequential(
        nn.Linear(196, 50),
        nn.BatchNorm1d(50),
        nn.ReLU(),
        nn.Linear(50, 1))

In [None]:
def create_deep_model():
    return nn.Sequential(
        nn.Linear(196, 4),
        nn.BatchNorm1d(4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.BatchNorm1d(8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.BatchNorm1d(16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.BatchNorm1d(32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.BatchNorm1d(64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 1))

In [None]:
models = [create_shallow_model, create_deep_model]

### Test Models

In [None]:
# stds = [-1, 1e-3, 1e-2, 1e-1, 1e-0, 1e1]
# for model in models:
#     compute_errors(m=model, 
#                    train_input=train_input, 
#                    train_classes=, 
#                    test_input=test_input,
#                    test_classes=,
#                    stds=stds)

## 2. Output Classes

### Preprocess Data

In [9]:
train_input, test_input = reshape_data(train_input, test_input)
train_input1, train_input2, test_input1, test_input2, train_classes1, train_classes2, test_classes1, test_classes2 = split_img_data(train_input, test_input, train_classes, test_classes)

train_input1 = 0.9*train_input1
train_input2 = 0.9*train_input2

test_input1 = 0.9*test_input1
test_input2 = 0.9*test_input2

train_classes1 = convert_to_one_hot_labels(train_input1, train_classes1)
train_classes2 = convert_to_one_hot_labels(train_input2, train_classes2)

test_classes1 = convert_to_one_hot_labels(test_input1, test_classes1)
test_classes2 = convert_to_one_hot_labels(test_input2, test_classes2)

train_input1, test_classes1 = normalize(train_input1, test_classes1)
train_input2, test_classes2 = normalize(train_input2, test_classes2)

In [10]:
train_input1, train_classes1 = Variable(train_input1), Variable(train_classes1)
test_input1, test_classes1 = Variable(test_input1), Variable(test_classes1)

### Define Models

In [None]:
def create_shallow_model():
    return nn.Sequential(
        nn.Linear(196, 50),
        nn.BatchNorm1d(50),
        nn.ReLU(),
        nn.Linear(50, 10))

In [None]:
def create_deep_model():
    return nn.Sequential(
        nn.Linear(196, 4),
        nn.BatchNorm1d(4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.BatchNorm1d(8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.BatchNorm1d(16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.BatchNorm1d(32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.BatchNorm1d(64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 10))

In [None]:
models = [create_shallow_model, create_deep_model]

### Test Models

In [None]:
stds = [-1, 1e-3, 1e-2, 1e-1, 1e-0, 1e1]
for model in models:
    compute_errors(m=model, 
                   train_input=train_input1, 
                   train_classes=train_classes1, 
                   test_input=test_input1,
                   test_classes=test_classes1,
                   stds=stds,
                   one_hot_encoded=True)

In [None]:
# Xavier initialization
for model in models:
    compute_errors(m=model, 
                   train_input=train_input1, 
                   train_classes=train_classes1, 
                   test_input=test_input1,
                   test_classes=test_classes1,
                   stds=None,
                   one_hot_encoded=True)

# Architecture: CNN 

In [11]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=3)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3)
        self.fc1 = nn.Linear(1504, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool1d(self.conv1(x), kernel_size=3, stride=2))
        x = F.relu(F.max_pool1d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 1504)))
        x = self.fc2(x)
        return x

class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        nb_hidden = 200
        self.conv1 = nn.Conv2d(196, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(9 * 64, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.conv3(x))
        x = F.relu(self.fc1(x.view(-1, 9 * 64)))
        x = self.fc2(x)
        return x

    def train_model(model, train_input, train_target, mini_batch_size):
        criterion = nn.MSELoss()
        eta = 1e-1

        for e in range(25):
            sum_loss = 0
            for b in range(0, train_input.size(0), mini_batch_size):
                output = model(train_input.narrow(0, b, mini_batch_size))
                loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
                model.zero_grad()
                loss.backward()
                sum_loss = sum_loss + loss.item()
                for p in model.parameters():
                    p.data.sub_(eta * p.grad.data)
            print(e, sum_loss)

In [12]:
train_input1_reshape = torch.unsqueeze(train_input1, 1)
print("Dimension of train_input1: {}".format(torch.unsqueeze(train_input1, 1).shape))

test_input1_reshape = torch.unsqueeze(test_input1, 1)
print("Dimension of train_input1: {}".format(torch.unsqueeze(test_input1, 1).shape))

Dimension of train_input1: torch.Size([1000, 1, 196])
Dimension of train_input1: torch.Size([1000, 1, 196])


In [19]:
def compute_nb_errors(model, data_input, data_target, one_hot_encoded):

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output.data, 1)
        for k in range(mini_batch_size):
            # max needed if one-hot encoded
            target = data_target.data[b + k].max(0)[1] if one_hot_encoded else data_target.data[b + k]
            if target != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

In [20]:
from torch.nn import functional as F

mini_batch_size = 100

for k in range(10):
    model = Net(100)
    train_model(model, train_input1_reshape, train_classes1, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input1_reshape, test_classes1, one_hot_encoded=True)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))

test error Net 9.50% 95/1000
test error Net 8.40% 84/1000
test error Net 11.00% 110/1000
test error Net 8.20% 82/1000
test error Net 8.90% 89/1000
test error Net 8.40% 84/1000
test error Net 9.70% 97/1000
test error Net 10.00% 100/1000
test error Net 9.00% 90/1000
test error Net 9.20% 92/1000
