In [1]:
import torch
import functions
import modules
import math
from optimizer import SGD

from torch import optim
from torch import Tensor
from torch import nn

- Generates a training and a test set of 1,000 points sampled uniformly in [0, 1]2, each with a
label 0 if outside the disk of radius 1/√2π and 1 inside,
- two input units, two output units, three hidden layers of 25 units,

## Test.py

## Generate data

In [2]:
def generate_disc_set(nb):
    input = torch.Tensor(nb, 2).uniform_(0, 1)
    target = input.pow(2).sum(1).sub(1 / (2 * math.pi)).sign().add(1).div(2).long()
    return input, target

In [3]:
def convert_to_one_hot_labels(input, target):
    tmp = input.new_zeros(target.size(0), target.max() + 1)
    tmp.scatter_(1, target.view(-1, 1), 1.0)
    return tmp

In [4]:
train_input, train_target = generate_disc_set(3)

In [5]:
print(train_input)
print(train_target)

tensor([[0.8115, 0.3116],
        [0.7300, 0.8388],
        [0.3151, 0.5629]])
tensor([1, 1, 1])


In [6]:
train_target = convert_to_one_hot_labels(train_input, train_target)

In [7]:
print(train_input)
print(train_target)

tensor([[0.8115, 0.3116],
        [0.7300, 0.8388],
        [0.3151, 0.5629]])
tensor([[0., 1.],
        [0., 1.],
        [0., 1.]])


In [8]:
train_input, train_target = generate_disc_set(1000)
train_target = convert_to_one_hot_labels(train_input, train_target)

In [9]:
print(train_input.shape)
print(train_target.shape)

torch.Size([1000, 2])
torch.Size([1000, 2])


In [10]:
arch1 = modules.Sequential(modules.Linear(2, 2))

In [11]:
for name, m in arch1.__dict__.items():
    print(name)
    print(m)

Linear0
<modules.Linear object at 0x1257a9fd0>


In [12]:
nb_epochs = 10
lr = 1e-1
optimizer = SGD(arch1.param(), lr=lr)
criterion = modules.MSELoss()


for e in range(nb_epochs):
    output = arch1.forward(train_input)
    loss = criterion.forward(output, train_target)
    gradwrtoutput = criterion.backward()
    grad = arch1.backward(gradwrtoutput)
    print("Loss: ", loss)
    optimizer.step()

# params = arch1.param()
# for w, dw in params:
#     w -= (lr * dw)
 

Loss:  tensor(999.9973)
Loss:  tensor(1406.0029)
Loss:  tensor(1918.5540)
Loss:  tensor(2537.6499)
Loss:  tensor(3263.2871)
Loss:  tensor(4095.4722)
Loss:  tensor(5034.1997)
Loss:  tensor(6079.4678)
Loss:  tensor(7231.2852)
Loss:  tensor(8489.6494)


## Mini-Project Architecture

In [13]:
layer0 = modules.Linear(2, 25)
layer1 = modules.Linear(25, 25)
layer2 = modules.Linear(25, 25)
layer3 = modules.Linear(25, 25)
layer4 = modules.Linear(25, 2)

In [14]:
seq = modules.Sequential(layer0, layer1, layer2, layer3, layer4)

In [15]:
for name, m in seq.__dict__.items():
    print(name)
    print(m)

Linear0
<modules.Linear object at 0x1257a9780>
Linear1
<modules.Linear object at 0x1257a99b0>
Linear2
<modules.Linear object at 0x1257b30f0>
Linear3
<modules.Linear object at 0x1257a9f60>
Linear4
<modules.Linear object at 0x1257a9128>


## Lab 5 Tests

In [16]:
def create_shallow_model():
    return nn.Sequential(
            nn.Linear(2, 128),
            nn.ReLU(),
            nn.Linear(128, 2)
        )

In [17]:
def create_deep_model():
    return nn.Sequential(
        nn.Linear(2, 4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 2),
    )

In [18]:
def train_model(model, train_input, train_target):
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=1e-1)
    nb_epochs = 250

    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
#             print("Output: ", output.shape)
#             print("Train_target: ", train_target.narrow(0, b, mini_batch_size).shape)
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size).float())
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [19]:
def compute_nb_errors(model, data_input, data_target):

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output, 1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

In [20]:
train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)

mean, std = train_input.mean(), train_input.std()

train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

mini_batch_size = 100

In [21]:
# for std in [ -1, 1e-3, 1e-2, 1e-1, 1e-0, 1e1 ]:
std = 1e-3
for m in [create_shallow_model, create_deep_model]:

    model = m()

    if std > 0:
        with torch.no_grad():
            for p in model.parameters(): 
                p.normal_(0, std)

    train_model(model, train_input, train_target)

    print('std {:s} {:f} train_error {:.02f}% test_error {:.02f}%'.format(
        m.__name__,
        std,
        compute_nb_errors(model, train_input, train_target) / train_input.size(0) * 100,
        compute_nb_errors(model, test_input, test_target) / test_input.size(0) * 100
    )
    )

RuntimeError: The size of tensor a (2) must match the size of tensor b (100) at non-singleton dimension 1

In [None]:
train_input, train_target = generate_disc_set(3)

In [None]:
print(train_input)
print(train_target)

In [None]:
def create_shallow_model():
    return nn.Sequential(
            nn.Linear(2, 1),
#             nn.ReLU(),
#             nn.Linear(3, 1)
        )

In [None]:
print(train_input)
model = create_shallow_model()
output = model(train_input)
print(output.shape)
print(output)