Refs:

https://github.com/deep-learning-with-pytorch/dlwpt-code

In [1]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn

### Neural Networks

In [2]:
model = nn.Sequential(
            nn.Linear(1, 13),
            nn.Tanh(),
            nn.Linear(13, 1)) 
model

Sequential(
  (0): Linear(in_features=1, out_features=13, bias=True)
  (1): Tanh()
  (2): Linear(in_features=13, out_features=1, bias=True)
)

#### Inspecting the parameters

In [3]:
## shapes of parameters
[param.shape for param in model.parameters()]

[torch.Size([13, 1]), torch.Size([13]), torch.Size([1, 13]), torch.Size([1])]

In [4]:
## names of parameters
for name, param in model.named_parameters():
    print(name, param.shape)

0.weight torch.Size([13, 1])
0.bias torch.Size([13])
2.weight torch.Size([1, 13])
2.bias torch.Size([1])


In [5]:
## name of each module
from collections import OrderedDict

model = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(1, 13)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(13, 1))
]))

model

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=13, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=13, out_features=1, bias=True)
)

In [6]:
for name, param in model.named_parameters():
    print(name, param.shape)

hidden_linear.weight torch.Size([13, 1])
hidden_linear.bias torch.Size([13])
output_linear.weight torch.Size([1, 13])
output_linear.bias torch.Size([1])


In [7]:
model.output_linear.bias

Parameter containing:
tensor([0.0652], requires_grad=True)

### Training neural networks

In [8]:
x = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
y = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]

x = torch.tensor(x)
y = torch.tensor(y)

## normalize:
x = 0.1*x 

x = x.reshape((-1,1))
y = y.reshape((-1,1))

print(x.shape, y.shape)

torch.Size([11, 1]) torch.Size([11, 1])


In [9]:
## Splitting the dataset
n_samples = x.shape[0]
n_val = int(0.2 * n_samples)

i_shuffled = torch.randperm(n_samples)

i_train = i_shuffled[:-n_val]
i_val = i_shuffled[-n_val:]

i_train, i_val

x_train = x[i_train]
y_train = y[i_train]

x_val = x[i_val]
y_val = y[i_val]

In [10]:
def training_loop(n_epochs, optimizer, model, loss_fn, x_train, y_train, x_val, y_val):
    for epoch in range(1, n_epochs + 1):
        y_train_p = model(x_train)
        loss_train = loss_fn(y_train_p, y_train)
                             
        y_val_p = model(x_val)
        loss_val = loss_fn(y_val_p, y_val)
        
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")

In [11]:
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr= 1e-3)

training_loop(n_epochs = 3000, optimizer = optimizer, model = model, loss_fn = loss_fn,
              x_train = x_train, y_train = y_train, x_val = x_val, y_val = y_val)

Epoch 1, Training loss 202.1035, Validation loss 114.4695
Epoch 2, Training loss 196.3557, Validation loss 109.1719
Epoch 3, Training loss 190.8982, Validation loss 104.1621
Epoch 500, Training loss 8.3797, Validation loss 5.6349
Epoch 1000, Training loss 3.6127, Validation loss 3.4223
Epoch 1500, Training loss 5.3904, Validation loss 6.2958
Epoch 2000, Training loss 3.4916, Validation loss 5.0886
Epoch 2500, Training loss 2.7251, Validation loss 4.8067
Epoch 3000, Training loss 2.2346, Validation loss 4.5258


In [12]:
print('y_pred:', model(x_val))
print('y_actual:', y_val)

y_pred: tensor([[12.9155],
        [ 8.1553]], grad_fn=<AddmmBackward>)
y_actual: tensor([[14.],
        [ 6.]])


In [13]:
print('check w.grad of hidden layer:')
print(model.hidden_linear.weight.grad)

check w.grad of hidden layer:
tensor([[ 1.7610e+01],
        [-6.8478e-03],
        [ 1.0071e+00],
        [ 5.2210e-03],
        [-1.3800e+01],
        [ 2.0804e+01],
        [ 2.0887e+01],
        [-2.0675e+00],
        [-1.5381e-02],
        [ 7.8584e-01],
        [ 1.2547e-03],
        [-2.2070e+01],
        [ 2.0565e+01]])
