In [1]:
import torch
import math
import prologue as prologue
import framework


torch.set_grad_enabled(True) #By the end we should have this set to False

<torch.autograd.grad_mode.set_grad_enabled at 0x10ddc6220>

In [2]:
# In this block we discuss put all the external functions, 
# namely generate_disc, computing number of errors, and training the model.
# We should not need to change any of this.

######################################################################
######################### DATA things ################################
######################################################################
def generate_disc_set(nb): # This can be kept the same
    input = torch.empty(nb, 2).uniform_(-1, 1)
    target = input.pow(2).sum(1).sub(2 / math.pi).sign().add(1).div(2).long()
    return input, target

train_input, train_target = generate_disc_set(100)
test_input, test_target = generate_disc_set(100)

# center the data:
mean, std = train_input.mean(), train_input.std()

train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

mini_batch_size = 1

######################################################################
######################### ERROR things ################################
######################################################################

def compute_nb_errors(model, data_input, data_target): #this can be kept the same

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output, 1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors


######################################################################
######################### TRAIN things ################################
######################################################################
# The following should be modified

def train_model(model, train_input, train_target):
    criterion = nn.CrossEntropyLoss() #replace this by self macde MSE
    optimizer = optim.SGD(model.parameters(), lr = 1e-1) #replace this by self made SGD for MSE
    nb_epochs = 10

    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad() # replace this so it works with 
            loss.backward() # replace this to self made backard method
            optimizer.step() # '_'



In [3]:
#initialize the network
Net = framework.Linear(3, 2)

In [4]:
#read the parameters for sanity check
Net.param()

[tensor([[ 0.4522,  0.0526, -0.3339],
         [ 0.6499,  0.2892, -0.5368]]),
 tensor([0.6768, 0.5370])]

In [5]:
#check the forward method
Net.forward(torch.tensor([1.0, 2.0, 3.0]))

tensor([0.2325, 0.1548])

In [6]:
#check the backward method
Net.backward(torch.tensor([1.0, -1.0]))

tensor([-0.1977, -0.2366,  0.2029])

In [7]:
#check the weight accum
Net.weights_grad_accum

tensor([[ 1.,  2.,  3.],
        [-1., -2., -3.]])

In [8]:
#Next we try the ReLu
rel = framework.ReLu()

In [9]:
rel.forward(Net.weights_grad_accum)

tensor([[1., 2., 3.],
        [0., 0., 0.]])

In [10]:
rel.backward(Net.weights_grad_accum)

tensor([[1., 2., 3.],
        [0., 0., 0.]])

In [11]:
#Now we try the Sequential
seq = framework.Sequential((framework.Linear(3, 2), framework.ReLu()))

In [12]:
seq.forward(torch.tensor([1.0, 2.0, 3.0]))

tensor([1., 2., 3.])
tensor([-1.1681,  0.9413])


tensor([0.0000, 0.9413])

In [13]:
seq.backward(torch.tensor([1.0, -1.0]))

tensor([ 1., -1.])
tensor([-0.1469, -0.2029, -0.4310])


tensor([0., 0., 0.])

In [14]:
#Below this line 

In [6]:
#Problem Set 3:
#/usr/bin/env python

# Any copyright is dedicated to the Public Domain.
# https://creativecommons.org/publicdomain/zero/1.0/

# Written by Francois Fleuret <francois@fleuret.org>



######################################################################

def sigma(x):
    return x.tanh()

def dsigma(x):
    return 4 * (x.exp() + x.mul(-1).exp()).pow(-2)

######################################################################

def loss(v, t): #This is MSE, right?
    return (v - t).pow(2).sum()

def dloss(v, t):
    return 2 * (v - t)

######################################################################

def forward_pass(w1, b1, w2, b2, x):
    x0 = x
    s1 = w1.mv(x0) + b1
    x1 = sigma(s1)
    s2 = w2.mv(x1) + b2
    x2 = sigma(s2)

    return x0, s1, x1, s2, x2

#whose arguments correspond to the network’s parameters, 
#the target vector, the quantities computed by the forward pass, 
#and the tensors used to store the cumulated sums of the gradient 
#on individual samples, and updates the latters according to the
#formula of the backward pass.

def backward_pass(w1, b1, w2, b2, #current weights and biases of the network
                  t, # target vector
                  x, s1, x1, s2, x2, #output of network
                  dl_dw1, dl_db1, dl_dw2, dl_db2): #tensors used to stor the sumulated sums of the gradient
    x0 = x
    dl_dx2 = dloss(x2, t)
    dl_ds2 = dsigma(s2) * dl_dx2
    dl_dx1 = w2.t().mv(dl_ds2)
    dl_ds1 = dsigma(s1) * dl_dx1

    dl_dw2.add_(dl_ds2.view(-1, 1).mm(x1.view(1, -1)))
    dl_db2.add_(dl_ds2)
    dl_dw1.add_(dl_ds1.view(-1, 1).mm(x0.view(1, -1)))
    dl_db1.add_(dl_ds1)

######################################################################


In [8]:
nb_classes = train_target.size(1)
nb_train_samples = train_input.size(0)

zeta = 0.90

train_target = train_target * zeta
test_target = test_target * zeta

nb_hidden = 50
eta = 1e-1 / nb_train_samples
epsilon = 1e-6

w1 = torch.empty(nb_hidden, train_input.size(1)).normal_(0, epsilon)
b1 = torch.empty(nb_hidden).normal_(0, epsilon)
w2 = torch.empty(nb_classes, nb_hidden).normal_(0, epsilon)
b2 = torch.empty(nb_classes).normal_(0, epsilon)

dl_dw1 = torch.empty(w1.size())
dl_db1 = torch.empty(b1.size())
dl_dw2 = torch.empty(w2.size())
dl_db2 = torch.empty(b2.size())

for k in range(1000):

    # Back-prop

    acc_loss = 0
    nb_train_errors = 0

    dl_dw1.zero_()
    dl_db1.zero_()
    dl_dw2.zero_()
    dl_db2.zero_()

    for n in range(nb_train_samples):
        x0, s1, x1, s2, x2 = forward_pass(w1, b1, w2, b2, train_input[n])

        pred = x2.max(0)[1].item()
        if train_target[n, pred] < 0.5: nb_train_errors = nb_train_errors + 1
        acc_loss = acc_loss + loss(x2, train_target[n])

        backward_pass(w1, b1, w2, b2,
                      train_target[n],
                      x0, s1, x1, s2, x2,
                      dl_dw1, dl_db1, dl_dw2, dl_db2)

    # Gradient step

    w1 = w1 - eta * dl_dw1
    b1 = b1 - eta * dl_db1
    w2 = w2 - eta * dl_dw2
    b2 = b2 - eta * dl_db2

    # Test error

    nb_test_errors = 0

    for n in range(test_input.size(0)):
        _, _, _, _, x2 = forward_pass(w1, b1, w2, b2, test_input[n])

        pred = x2.max(0)[1].item()
        if test_target[n, pred] < 0.5: nb_test_errors = nb_test_errors + 1

    print('{:d} acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'
          .format(k,
                  acc_loss,
                  (100 * nb_train_errors) / train_input.size(0),
                  (100 * nb_test_errors) / test_input.size(0)))

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [14]:
torch.set_grad_enabled(True) #At the end, this should be set to False

#Pb set 5:
#!/usr/bin/env python

# Any copyright is dedicated to the Public Domain.
# https://creativecommons.org/publicdomain/zero/1.0/

# Written by Francois Fleuret <francois@fleuret.org>

import torch
import math

from torch import optim
from torch import Tensor
from torch import nn

######################################################################

def generate_disc_set(nb):
    input = torch.empty(nb, 2).uniform_(-1, 1)
    target = input.pow(2).sum(1).sub(2 / math.pi).sign().add(1).div(2).long()
    return input, target

#train_input, train_target = generate_disc_set(1000)
#test_input, test_target = generate_disc_set(1000)

mean, std = train_input.mean(), train_input.std()

train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

mini_batch_size = 100

######################################################################

def train_model(model, train_input, train_target):
    criterion = nn.CrossEntropyLoss() #replace this by self macde MSE
    optimizer = optim.SGD(model.parameters(), lr = 1e-1) #replace this by self made SGD for MSE
    nb_epochs = 250

    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad() # replace this so it works with 
            loss.backward() # replace this to self made backard method
            optimizer.step() # '_'

######################################################################

def compute_nb_errors(model, data_input, data_target): #this s

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output, 1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

######################################################################

def create_shallow_model():
    return nn.Sequential(
        nn.Linear(2, 128),
        nn.ReLU(),
        nn.Linear(128, 2)
    )

def create_deep_model():
    return nn.Sequential(
        nn.Linear(2, 4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 2)
    )

######################################################################

for std in [ -1, 1e-3, 1e-2, 1e-1, 1e-0, 1e1 ]:

    for m in [ create_shallow_model, create_deep_model ]:

        model = m()

        if std > 0:
            with torch.no_grad():
                for p in model.parameters(): p.normal_(0, std)

        train_model(model, train_input, train_target)

        print('std {:s} {:f} train_error {:.02f}% test_error {:.02f}%'.format(
            m.__name__,
            std,
            compute_nb_errors(model, train_input, train_target) / train_input.size(0) * 100,
            compute_nb_errors(model, test_input, test_target) / test_input.size(0) * 100
        )
        )

######################################################################

std create_shallow_model -1.000000 train_error 1.00% test_error 0.50%
std create_deep_model -1.000000 train_error 8.50% test_error 8.90%
std create_shallow_model 0.001000 train_error 1.40% test_error 1.60%
std create_deep_model 0.001000 train_error 49.60% test_error 50.00%
std create_shallow_model 0.010000 train_error 0.70% test_error 1.40%
std create_deep_model 0.010000 train_error 49.60% test_error 50.00%
std create_shallow_model 0.100000 train_error 1.00% test_error 0.50%
std create_deep_model 0.100000 train_error 50.40% test_error 50.00%
std create_shallow_model 1.000000 train_error 0.80% test_error 0.50%
std create_deep_model 1.000000 train_error 50.40% test_error 50.00%
std create_shallow_model 10.000000 train_error 0.60% test_error 0.80%
std create_deep_model 10.000000 train_error 50.40% test_error 50.00%
