References:

https://github.com/yunjey/pytorch-tutorial 

https://medium.com/coinmonks/create-a-neural-network-in-pytorch-and-make-your-life-simpler-ec5367895199

In [1]:
import torch, torch.nn as nn, copy, timeit
from torch.distributions.bernoulli import Bernoulli 
from time import time

#### Define number of layers and number of nodes per layer:

In [2]:
n_in = 2
n_h1 = 5
n_out = 2
batch_size = 5

#### Define model:

In [5]:
model = nn.Sequential(nn.Linear(n_in, n_h1),
                      nn.ReLU(),
                      nn.Linear(n_h1, n_out),
                      nn.Sigmoid())

In [6]:
print(model)

Sequential(
  (0): Linear(in_features=2, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=2, bias=True)
  (3): Sigmoid()
)


* Make a copy of the model:

In [7]:
model_copy = copy.deepcopy(model)

* Modify parameters within model:

In [98]:
for (i, param) in enumerate(model.parameters()) :
    print(param.data)
#     param.data = param.data.add(1)
    param.data.add_(1)

tensor([[-0.2539, -0.1512],
        [ 0.3954, -0.5860],
        [ 0.4480, -0.0537],
        [ 0.0240, -0.0454],
        [ 0.6799,  0.0556]])
tensor([-0.0061,  0.4293,  0.6972, -0.0248,  0.0624])
tensor([[-0.0892,  0.0842,  0.4037,  0.0654, -0.0591],
        [-0.2343,  0.3095,  0.2864, -0.1971,  0.1245],
        [-0.2816,  0.1038, -0.3724, -0.0171,  0.3661],
        [ 0.3533,  0.3257,  0.0191,  0.0169, -0.1510],
        [ 0.2251,  0.1269,  0.0636,  0.1607,  0.1241]])
tensor([ 0.1911,  0.3518,  0.1082, -0.1249, -0.1715])
tensor([[-0.2523, -0.3053, -0.1101, -0.4300, -0.0090],
        [-0.1467, -0.3035, -0.2172,  0.2510, -0.1680]])
tensor([-0.3583, -0.1321])


In [99]:
for (i, param) in enumerate(model.parameters()) :
    print(param)

Parameter containing:
tensor([[0.7461, 0.8488],
        [1.3954, 0.4140],
        [1.4480, 0.9463],
        [1.0240, 0.9546],
        [1.6799, 1.0556]], requires_grad=True)
Parameter containing:
tensor([0.9939, 1.4293, 1.6972, 0.9752, 1.0624], requires_grad=True)
Parameter containing:
tensor([[0.9108, 1.0842, 1.4037, 1.0654, 0.9409],
        [0.7657, 1.3095, 1.2864, 0.8029, 1.1245],
        [0.7184, 1.1038, 0.6276, 0.9829, 1.3661],
        [1.3533, 1.3257, 1.0191, 1.0169, 0.8490],
        [1.2251, 1.1269, 1.0636, 1.1607, 1.1241]], requires_grad=True)
Parameter containing:
tensor([1.1911, 1.3518, 1.1082, 0.8751, 0.8285], requires_grad=True)
Parameter containing:
tensor([[0.7477, 0.6947, 0.8899, 0.5700, 0.9910],
        [0.8533, 0.6965, 0.7828, 1.2510, 0.8320]], requires_grad=True)
Parameter containing:
tensor([0.6417, 0.8679], requires_grad=True)


* Check total number of parameters:

In [100]:
print(sum(p.numel() for p in model.parameters()))
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

57
57


#### Simulate some data:

In [101]:
x = torch.randn(batch_size, n_in, requires_grad=True)
# y = torch.tensor([[1.,1.], [0.,1.], [0.,1.], [1.,1.], [1.,0.], [1.,0.], [0.,1.], [0.,1.], [1.,0.], [1.,1.]])
y = Bernoulli(0.3).sample(sample_shape=[batch_size, n_out])
print(x)
print(y)

tensor([[ 0.0319, -0.2415],
        [ 0.0912, -0.3437],
        [-0.9408,  0.2738],
        [-1.4995, -0.6780],
        [-2.2065,  0.3260]], requires_grad=True)
tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [1., 0.],
        [1., 0.]])


In [105]:
y

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [1., 0.],
        [1., 0.]])

#### Randomly initialise model weights:

In [13]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.uniform_(m.weight)
#         nn.init.normal_(m.weight)


# use the modules apply function to recursively apply the initialization
model.apply(init_normal)

Sequential(
  (0): Linear(in_features=2, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=1, bias=True)
  (5): Sigmoid()
)

In [14]:
for (i, param) in enumerate(model.parameters()) :
    print(param)

Parameter containing:
tensor([[0.6192, 0.7905],
        [0.6344, 0.7716],
        [0.9902, 0.0784],
        [0.0578, 0.6072],
        [0.4677, 0.0873]], requires_grad=True)
Parameter containing:
tensor([0.4918, 0.4377, 0.6472, 1.0738, 0.9308], requires_grad=True)
Parameter containing:
tensor([[0.8421, 0.5173, 0.0555, 0.3516, 0.7750],
        [0.2502, 0.4258, 0.3056, 0.2145, 0.6253],
        [0.6557, 0.1988, 0.4634, 0.2234, 0.7391],
        [0.1546, 0.5395, 0.9678, 0.4042, 0.7264],
        [0.7077, 0.6203, 0.5500, 0.4421, 0.1903]], requires_grad=True)
Parameter containing:
tensor([1.1435, 1.3232, 1.2173, 0.5792, 0.6759], requires_grad=True)
Parameter containing:
tensor([[0.4591, 0.8354, 0.3389, 0.3499, 0.4650]], requires_grad=True)
Parameter containing:
tensor([0.9154], requires_grad=True)


#### Get shapes of parameter tensors:

In [15]:
shapes = []
for param in model.parameters() :
    shapes.append(param.shape)
print(shapes)

[torch.Size([5, 2]), torch.Size([5]), torch.Size([5, 5]), torch.Size([5]), torch.Size([1, 5]), torch.Size([1])]


#### Pass input through model:

In [16]:
pred = model(x)
print(pred)

tensor([[0.9999],
        [1.0000],
        [0.9995],
        [0.9893],
        [0.9912],
        [0.9997],
        [0.9999],
        [0.9997],
        [1.0000],
        [0.9998],
        [0.9778],
        [0.9993],
        [1.0000],
        [0.9852],
        [0.9896],
        [0.9905],
        [0.9998],
        [0.9999],
        [0.9984],
        [0.9997],
        [0.9997],
        [0.9862],
        [0.9995],
        [0.9999],
        [0.9983],
        [0.9960],
        [0.9996],
        [0.9954],
        [0.9995],
        [0.9913],
        [0.9896],
        [1.0000],
        [0.9837],
        [0.9994],
        [0.9990],
        [0.9918],
        [0.9992],
        [1.0000],
        [1.0000],
        [0.9999],
        [1.0000],
        [0.9877],
        [1.0000],
        [0.9818],
        [1.0000],
        [0.9986],
        [1.0000],
        [0.9869],
        [1.0000],
        [0.9901],
        [0.9928],
        [1.0000],
        [0.9995],
        [0.9867],
        [1.0000],
        [0

#### Define loss function:

In [17]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [18]:
loss = criterion(pred, y)
loss.backward(retain_graph=True)

In [19]:
print(loss)

tensor(0.7452, grad_fn=<MseLossBackward>)


#### Look at gradients:

In [20]:
x.grad

tensor([[ 5.0117e-06,  3.8916e-06],
        [ 6.9085e-08,  5.3645e-08],
        [ 2.9369e-05,  2.2805e-05],
        [ 1.5344e-04,  1.2404e-04],
        [-1.1181e-06, -9.0391e-07],
        [ 2.0605e-05,  1.6000e-05],
        [ 8.4875e-06,  6.5906e-06],
        [ 1.7020e-05,  1.3216e-05],
        [ 3.1776e-06,  2.4674e-06],
        [-3.0822e-09, -2.3934e-09],
        [ 1.8697e-05,  1.9649e-04],
        [ 4.2014e-05,  3.2624e-05],
        [ 1.6120e-07,  1.2517e-07],
        [-1.8999e-07, -1.9966e-06],
        [ 3.4759e-04,  4.2546e-05],
        [ 1.3668e-04,  1.1049e-04],
        [ 1.3339e-05,  1.0358e-05],
        [ 4.9553e-06,  5.4549e-06],
        [-1.1859e-07, -1.3054e-07],
        [ 2.0084e-05,  1.5595e-05],
        [ 1.1837e-05,  1.3031e-05],
        [ 1.1856e-05,  1.2460e-04],
        [-1.7148e-08, -1.3315e-08],
        [ 3.2390e-06,  2.5151e-06],
        [ 1.0829e-04,  8.4087e-05],
        [ 1.3885e-04,  5.3254e-05],
        [ 2.3625e-05,  1.8345e-05],
        [ 1.5516e-04,  1.899

In [21]:
grads = []
for (i, param) in enumerate(model.parameters()) :
    grads.append(param.grad)
    print (i, param.grad)

0 tensor([[-0.0007,  0.0004],
        [-0.0006,  0.0004],
        [-0.0004, -0.0017],
        [-0.0033, -0.0017],
        [-0.0047, -0.0049]])
1 tensor([0.0009, 0.0007, 0.0016, 0.0030, 0.0059])
2 tensor([[1.1858e-04, 9.3225e-05, 2.3482e-04, 1.1798e-03, 8.8652e-04],
        [2.1578e-04, 1.6963e-04, 4.2728e-04, 2.1467e-03, 1.6131e-03],
        [8.7546e-05, 6.8826e-05, 1.7336e-04, 8.7099e-04, 6.5449e-04],
        [9.0375e-05, 7.1049e-05, 1.7896e-04, 8.9913e-04, 6.7564e-04],
        [1.2011e-04, 9.4428e-05, 2.3785e-04, 1.1950e-03, 8.9796e-04]])
3 tensor([0.0022, 0.0039, 0.0016, 0.0016, 0.0022])
4 tensor([[0.0081, 0.0083, 0.0082, 0.0058, 0.0053]])
5 tensor([0.0047])


#### Optimize:

In [22]:
for epoch in range(50_000):
    # Forward Propagation
    y_pred = model(x)
    # Compute and print loss
    loss = criterion(y_pred, y)
    if (epoch+1)%5_000 == 0 :
        print('epoch: ', epoch+1,' loss: ', loss.item())
    # Zero the gradients
    optimizer.zero_grad()
    
    # perform a backward pass (backpropagation)
    loss.backward()
    
    # Update the parameters
    optimizer.step()

epoch:  5000  loss:  0.18439698219299316
epoch:  10000  loss:  0.18186572194099426
epoch:  15000  loss:  0.18050706386566162
epoch:  20000  loss:  0.17922110855579376
epoch:  25000  loss:  0.17807553708553314
epoch:  30000  loss:  0.1772245317697525
epoch:  35000  loss:  0.17646253108978271
epoch:  40000  loss:  0.1757882684469223
epoch:  45000  loss:  0.17525261640548706
epoch:  50000  loss:  0.17485176026821136


In [106]:
y_pred = model(x)
y_pred

tensor([[1.0000, 1.0000],
        [1.0000, 1.0000],
        [1.0000, 1.0000],
        [0.9917, 0.9958],
        [0.9917, 0.9958]], grad_fn=<SigmoidBackward>)

In [107]:
loss = criterion(y_pred, y)

In [110]:
y.dtype

torch.float32

In [111]:
y.double()

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [1., 0.],
        [1., 0.]], dtype=torch.float64)

In [113]:
y.float()

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [1., 0.],
        [1., 0.]])

In [115]:
y.dtype

torch.float32