In [7]:
import torch

t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c).unsqueeze(1) # <1>
t_u = torch.tensor(t_u).unsqueeze(1) # <1>

t_u.shape

torch.Size([11, 1])

In [8]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

(tensor([ 4,  5,  3,  1,  2,  9,  0, 10,  6]), tensor([7, 8]))

In [9]:
t_u_train = t_u[train_indices]
t_c_train = t_c[train_indices]

t_u_val = t_u[val_indices]
t_c_val = t_c[val_indices]

t_un_train = 0.1 * t_u_train
t_un_val = 0.1 * t_u_val

In [10]:
from torch import nn

linear_model = nn.Linear(1, 1) # <1>
linear_model(t_un_val)

tensor([[1.3735],
        [2.8091]], grad_fn=<AddmmBackward0>)

In PyTorch, when you initialize a neural network layer such as nn.Linear (linear model), the input parameters represent the dimensions of the input and output tensors. Specifically, the nn.Linear layer takes two arguments: in_features and out_features.

    in_features (int): This is the size of each input sample. For example, if you have a batch of input vectors, each with d features, then in_features should be set to d.
    
    out_features (int): This is the size of each output sample. For example, if you want the linear layer to produce output vectors of size k, then out_features should be set to k.


In [12]:
linear_model.weight, linear_model.bias

(Parameter containing:
 tensor([[0.5397]], requires_grad=True),
 Parameter containing:
 tensor([0.1969], requires_grad=True))

In [13]:
x = torch.ones(10, 1)
linear_model(x)

tensor([[0.7366],
        [0.7366],
        [0.7366],
        [0.7366],
        [0.7366],
        [0.7366],
        [0.7366],
        [0.7366],
        [0.7366],
        [0.7366]], grad_fn=<AddmmBackward0>)

In [14]:
from torch import optim

In [17]:
opitmizer = optim.SGD(linear_model.parameters(), lr=1e-2)

In [26]:
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val, t_c_train, t_c_val):
    for epoch in range(n_epochs):
        t_p_train = model(t_u_train)
        loss_train = loss_fn(t_c_train, t_p_train)

        t_p_val = model(t_u_val)
        loss_val = loss_fn(t_c_val, t_p_val)

        opitmizer.zero_grad()
        loss_train.backward()
        opitmizer.step()

        if epoch == 1 or epoch % 1000 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f}, Validation loss {loss_val.item():.4f}")

In [28]:
linear_model = nn.Linear(1, 1) # <1>
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    model=linear_model,
    loss_fn=nn.MSELoss(),
    t_u_train=t_un_train,
    t_u_val=t_un_val,
    t_c_train=t_c_train,
    t_c_val=t_c_val
)

print()
print(linear_model.weight)
print(linear_model.bias)

Epoch 0, Training loss 133.5405, Validation loss 20.6322
Epoch 1, Training loss 133.5405, Validation loss 20.6322
Epoch 1000, Training loss 133.5405, Validation loss 20.6322
Epoch 2000, Training loss 133.5405, Validation loss 20.6322

Parameter containing:
tensor([[0.5410]], requires_grad=True)
Parameter containing:
tensor([0.6209], requires_grad=True)


In [31]:
from collections import OrderedDict

seq_model = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(1, 13)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(13, 1))
]))

optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)

training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    model=seq_model,
    loss_fn=nn.MSELoss(),
    t_u_train=t_un_train,
    t_u_val=t_un_val,
    t_c_train=t_c_train,
    t_c_val=t_c_val
)

print('output', seq_model(t_un_val))
print('answer', t_c_val)
print('hidden', seq_model.hidden_linear.weight.grad)

Epoch 0, Training loss 211.5442, Validation loss 24.4654
Epoch 1, Training loss 211.5442, Validation loss 24.4654
Epoch 1000, Training loss 211.5442, Validation loss 24.4654
Epoch 2000, Training loss 211.5442, Validation loss 24.4654
output tensor([[0.2077],
        [0.4120]], grad_fn=<AddmmBackward0>)
answer tensor([[-4.],
        [ 6.]])
hidden tensor([[-3.6773e+04],
        [ 5.1166e+01],
        [ 4.7368e+04],
        [-1.4234e+01],
        [ 8.9447e+01],
        [ 3.4089e+03],
        [-1.4378e+00],
        [-2.6385e+01],
        [-5.0987e+01],
        [ 9.1511e+02],
        [-1.2216e+03],
        [-1.0343e+02],
        [-1.2994e+02]])
