In [1]:
import torch
import functions
import modules
import math
from optimizer import SGD

from torch import optim
from torch import Tensor
from torch import nn

# torch.set_grad_enabled(False)

- Generates a training and a test set of 1,000 points sampled uniformly in [0, 1]2, each with a
label 0 if outside the disk of radius 1/√2π and 1 inside,
- two input units, two output units, three hidden layers of 25 units,

## Test.py

## Generate data

In [2]:
def generate_disc_set(nb):
    input = torch.Tensor(nb, 2).uniform_(0, 1)
    target = input.pow(2).sum(1).sub(1 / (2 * math.pi)).sign().add(1).div(2).long()
    return input, target

In [3]:
def convert_to_one_hot_labels(input, target):
    tmp = input.new_zeros(target.size(0), target.max() + 1)
    tmp.scatter_(1, target.view(-1, 1), 1.0)
    return tmp

In [4]:
train_input, train_target = generate_disc_set(3)

In [5]:
print(train_input)
print(train_target)

tensor([[0.8485, 0.8126],
        [0.7228, 0.7657],
        [0.1032, 0.3349]])
tensor([1, 1, 0])


In [6]:
train_target = convert_to_one_hot_labels(train_input, train_target)

In [7]:
print(train_input)
print(train_target)

tensor([[0.8485, 0.8126],
        [0.7228, 0.7657],
        [0.1032, 0.3349]])
tensor([[0., 1.],
        [0., 1.],
        [1., 0.]])


In [8]:
train_input, train_target = generate_disc_set(1000)
train_target = convert_to_one_hot_labels(train_input, train_target)

In [9]:
mean, std = train_input.mean(), train_input.std()
train_input.sub_(mean).div_(std)

tensor([[ 1.2249, -0.1160],
        [-0.0466,  1.5746],
        [ 1.2645, -1.2795],
        ...,
        [ 1.4004, -0.3174],
        [ 0.4080,  1.1320],
        [-1.3657,  0.8278]])

In [10]:
train_input = 0.9 * train_input

In [11]:
print(train_input.shape)
print(train_target.shape)

torch.Size([1000, 2])
torch.Size([1000, 2])


In [12]:
arch1 = modules.Sequential(modules.Linear(2, 25), modules.Linear(25, 2))

In [13]:
for name, m in arch1.__dict__.items():
    print(name)
    print(m)

Linear0
<modules.Linear object at 0x114b1dac8>
Linear1
<modules.Linear object at 0x114b1db00>


In [14]:
nb_epochs = 2
lr = 1e-3
optimizer = SGD(arch1.param(), lr=lr)
criterion = modules.MSELoss()

for e in range(nb_epochs):
    output = arch1.forward(train_input)
#     print("output: ", output.shape)
#     print("train_target: ", train_target.shape)
    loss = criterion.forward(output, train_target)
    optimizer.zero_grad()
    gradwrtoutput = criterion.backward()
    grad = arch1.backward(gradwrtoutput)
    for (p, grad2) in arch1.param():
#         print("p: ", p.shape)
#         print("grad2: ", grad2.shape)
        p -= lr * grad2
#     print("Grad: ", grad)
#     optimizer.step()

In [15]:
model = nn.Sequential(
            nn.Linear(2, 2),
#             nn.ReLU(),
#             nn.Linear(128, 2)
        )
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=1e-1)
nb_epochs = 10

for e in range(nb_epochs):
    output = model(train_input)
    loss = criterion(output, train_target.float())
    print(loss)
    model.zero_grad()
    loss.backward()
    optimizer.step()

tensor(1.0319, grad_fn=<MseLossBackward>)
tensor(0.8630, grad_fn=<MseLossBackward>)
tensor(0.7240, grad_fn=<MseLossBackward>)
tensor(0.6096, grad_fn=<MseLossBackward>)
tensor(0.5154, grad_fn=<MseLossBackward>)
tensor(0.4378, grad_fn=<MseLossBackward>)
tensor(0.3739, grad_fn=<MseLossBackward>)
tensor(0.3211, grad_fn=<MseLossBackward>)
tensor(0.2777, grad_fn=<MseLossBackward>)
tensor(0.2418, grad_fn=<MseLossBackward>)


## Mini-Project Architecture

In [16]:
seq = modules.Sequential(modules.Linear(2, 25),
                         modules.TanH(),
                         modules.Linear(25, 25),
                         modules.TanH(),
                         modules.Linear(25, 25),
                         modules.TanH(),
                         modules.Linear(25, 25),
                         modules.TanH(),
                         modules.Linear(25, 2)
                        )

In [17]:
for name, m in seq.__dict__.items():
    print(name)
    print(m)

Linear0
<modules.Linear object at 0x12132ddd8>
TanH1
<modules.TanH object at 0x12132d780>
Linear2
<modules.Linear object at 0x12132d828>
TanH3
<modules.TanH object at 0x12132d7f0>
Linear4
<modules.Linear object at 0x12132d748>
TanH5
<modules.TanH object at 0x12132dcf8>
Linear6
<modules.Linear object at 0x12132de48>
TanH7
<modules.TanH object at 0x12132de10>
Linear8
<modules.Linear object at 0x12132dfd0>


In [18]:
nb_epochs = 10
lr = 1e-3
optimizer = SGD(seq.param(), lr=lr)
criterion = modules.MSELoss()


for e in range(nb_epochs):
    output = seq.forward(train_input)
    loss = criterion.forward(output, train_target)
    print("Loss: ", loss)
    gradwrtoutput = criterion.backward()
#     print("Gradwrtoutput: ", gradwrtoutput.shape)
    grad = seq.backward(gradwrtoutput)
    for (p, grad2) in arch1.param():
        print("p: ", p)
        print("grad2: ", grad2)
        p -= lr * grad2
        print("p: ", p)
    optimizer.zero_grad()
    optimizer.step()

Loss:  tensor(1276.6481)
p:  tensor([[-1.8025e-01, -3.7470e-01],
        [ 2.1350e-01, -8.9301e-02],
        [-3.0673e-01, -3.4626e-01],
        [ 1.3133e-01,  2.2382e-01],
        [-1.9389e-02,  2.5167e-01],
        [ 5.0168e-01,  1.2072e-01],
        [ 3.0390e-02, -2.5953e-01],
        [-4.1834e-02, -8.0818e-02],
        [ 8.0836e-03,  6.6040e-01],
        [-1.0801e-01,  4.0834e-01],
        [ 1.1729e-01, -8.3192e-02],
        [-9.4032e-02, -8.5503e-01],
        [ 1.4226e-01,  5.2021e-01],
        [ 5.6614e-02,  5.3780e-04],
        [-1.5531e-01,  1.4088e-02],
        [ 1.3098e-01,  1.8763e-01],
        [-1.5644e-01,  1.0309e+00],
        [ 3.7992e-02,  1.3523e-01],
        [-2.0884e-02, -4.9977e-01],
        [-5.7389e-02,  6.2610e-02],
        [ 1.3037e-01,  3.1585e-01],
        [-1.2921e-01,  4.9171e-01],
        [ 2.6840e-01, -1.9498e-01],
        [-1.7381e-01, -1.7888e-01],
        [ 4.1529e-01,  2.6788e-01]])
grad2:  tensor([[  292.6515,   687.8332],
        [  110.9289,  -179.9

p:  tensor([ -4.1964, -13.1628])
Loss:  tensor(1276.6481)
p:  tensor([[-7.6555e-01, -1.7504e+00],
        [-8.3562e-03,  2.7063e-01],
        [-2.8684e-01, -1.8253e+00],
        [ 3.7641e-01,  7.7131e-01],
        [ 2.0930e-01,  1.2616e+00],
        [ 1.1707e+00,  1.5297e+00],
        [ 1.1434e-01, -3.2616e-01],
        [-1.9977e-01, -1.2096e-01],
        [ 6.0139e-01,  2.4701e+00],
        [ 2.8397e-01,  1.2514e+00],
        [ 3.6769e-02, -3.7248e-01],
        [-5.3521e-01, -2.8642e+00],
        [ 6.5127e-01,  2.1139e+00],
        [ 6.6724e-02,  8.6352e-02],
        [-8.9973e-01, -5.2878e-01],
        [ 2.5442e-01,  1.1815e+00],
        [ 3.5475e-01,  2.9601e+00],
        [ 1.7367e-04,  4.6024e-01],
        [-9.6252e-02, -1.3123e+00],
        [-5.0426e-01, -1.1771e-01],
        [ 8.1218e-01,  1.7606e+00],
        [-1.8090e-02,  1.4678e+00],
        [ 2.8679e-01,  1.6755e-02],
        [-8.3047e-02, -7.6027e-01],
        [ 1.3132e+00,  2.3112e+00]])
grad2:  tensor([[  292.6515,   687.83

grad2:  tensor([[ 1142.8586,   154.8884,   503.7954,  -873.4234,    60.2637, -1728.7059,
           360.0982,   552.7394,  -852.0872,  -432.0341,  -361.3446,  1227.2941,
         -1052.0377,  -133.8237,  1113.0126,  -165.2487,  -971.3903,  -106.3192,
           670.8834,   544.6733,  -896.2150,   -15.8878,  -277.0606,   369.4789,
         -1529.5994],
        [ 2959.3325,  1487.0256,   356.3707, -2773.9238,   883.3246, -3973.6729,
          1871.4707,  2051.6172, -2280.7183, -1567.2976, -1310.7413,  3734.7505,
         -2763.2566,  -295.8126,  2995.7939,   399.8132, -3666.6077,  -245.6283,
          2533.9546,  1458.6401, -1907.1490,  -160.5852,    99.2915,   791.9130,
         -2907.1091]])
p:  tensor([[ -5.6224,  -1.1090,  -1.8827,   4.3406,  -0.5352,   8.6071,  -1.7031,
          -2.8777,   3.9933,   2.1315,   1.8613,  -5.6598,   5.0138,   0.6415,
          -5.9540,   0.5120,   4.4718,   0.3641,  -3.0763,  -3.0178,   4.4402,
          -0.2376,   1.3118,  -1.5286,   7.5352],
        

grad2:  tensor([[  292.6515,   687.8332],
        [  110.9289,  -179.9654],
        [   -9.9411,   739.5035],
        [ -122.5436,  -273.7420],
        [ -114.3425,  -504.9634],
        [ -334.5139,  -704.4781],
        [  -41.9765,    33.3144],
        [   78.9669,    20.0699],
        [ -296.6541,  -904.8615],
        [ -195.9880,  -421.5160],
        [   40.2602,   144.6458],
        [  220.5884,  1004.6010],
        [ -254.5022,  -796.8253],
        [   -5.0553,   -42.9070],
        [  372.2103,   271.4348],
        [  -61.7196,  -496.9270],
        [ -255.5977,  -964.5966],
        [   18.9094,  -162.5040],
        [   37.6841,   406.2871],
        [  223.4356,    90.1602],
        [ -340.9059,  -722.3815],
        [  -55.5592,  -488.0450],
        [   -9.1947,  -105.8659],
        [  -45.3839,   290.6933],
        [ -448.9708, -1021.6733]])
p:  tensor([[-2.2288, -5.1895],
        [-0.5630,  1.1705],
        [-0.2371, -5.5228],
        [ 0.9891,  2.1400],
        [ 0.7810,  3.7864

p:  tensor([[-2.8141, -6.5652],
        [-0.7849,  1.5304],
        [-0.2173, -7.0018],
        [ 1.2342,  2.6875],
        [ 1.0097,  4.7963],
        [ 3.5123,  6.4610],
        [ 0.4082, -0.5594],
        [-0.7525, -0.2614],
        [ 2.6780,  8.8042],
        [ 1.6559,  4.2020],
        [-0.2451, -1.3850],
        [-2.0793, -9.8964],
        [ 2.4328,  7.6916],
        [ 0.1021,  0.3867],
        [-3.5052, -2.4288],
        [ 0.6865,  4.6600],
        [ 2.1439,  9.7123],
        [-0.1322,  1.5978],
        [-0.3600, -4.1564],
        [-2.0683, -0.7488],
        [ 3.1985,  6.8173],
        [ 0.3708,  4.8841],
        [ 0.3512,  0.7578],
        [ 0.2346, -2.7951],
        [ 4.4560,  9.4629]])
grad2:  tensor([[  292.6515,   687.8332],
        [  110.9289,  -179.9654],
        [   -9.9411,   739.5035],
        [ -122.5436,  -273.7420],
        [ -114.3425,  -504.9634],
        [ -334.5139,  -704.4781],
        [  -41.9765,    33.3144],
        [   78.9669,    20.0699],
        [ -296.