In [1]:
import torch
import numpy as np

In [2]:
X_train = torch.tensor([[0, 0],
                       [0, 1],
                       [1, 0],
                       [1, 1]], dtype=torch.float32)
Y_train = torch.tensor([[0],
                       [1],
                       [0],
                       [1]], dtype=torch.float32)

X_test = torch.tensor([[1, 0],
                      [1, 1]], dtype=torch.float32)
Y_test = torch.tensor([[0],
                      [1]], dtype=torch.float32)

In [3]:
class TestNN(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.nn_stack = torch.nn.Sequential(
            torch.nn.Linear(2, 2),
            torch.nn.Sigmoid(),
            torch.nn.Linear(2, 1)
        )

    def forward(self, X):
        return self.nn_stack(X)


def train_step(model: torch.nn.Module, x, y, loss_fn, optimizer):
    model.train()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

def test_step(model: torch.nn.Module, x, y, epoch, loss_fn):
    model.eval()
    with torch.inference_mode():
        test_y_pred = model(x)
        test_loss = loss_fn(test_y_pred, y)
        print(f"Epoch {epoch} | Test loss: {test_loss} | Test std {test_y_pred.squeeze().std()}")

In [4]:
model = TestNN()
loss_fn = torch.nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
for epoch in range(1000):
    train_step(model, x=X_train, y=Y_train, optimizer=optimizer, loss_fn=loss_fn)
    test_step(model, x=X_train, y=Y_train, epoch=epoch, loss_fn=loss_fn)

Epoch 0 | Test loss: 0.2459689974784851 | Test std 0.01932240091264248
Epoch 1 | Test loss: 0.24232497811317444 | Test std 0.01968681812286377
Epoch 2 | Test loss: 0.23871979117393494 | Test std 0.020067276433110237
Epoch 3 | Test loss: 0.2351544350385666 | Test std 0.02046341449022293
Epoch 4 | Test loss: 0.23162996768951416 | Test std 0.02087496779859066
Epoch 5 | Test loss: 0.22814735770225525 | Test std 0.02130155637860298
Epoch 6 | Test loss: 0.22470751404762268 | Test std 0.02174282632768154
Epoch 7 | Test loss: 0.22131147980690002 | Test std 0.022198433056473732
Epoch 8 | Test loss: 0.21796005964279175 | Test std 0.02266797609627247
Epoch 9 | Test loss: 0.21465417742729187 | Test std 0.023151133209466934
Epoch 10 | Test loss: 0.21139469742774963 | Test std 0.023647435009479523
Epoch 11 | Test loss: 0.20818239450454712 | Test std 0.02415653131902218
Epoch 12 | Test loss: 0.20501808822155 | Test std 0.024677982553839684
Epoch 13 | Test loss: 0.20190253853797913 | Test std 0.025211

In [5]:
def count_parameters(model):
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        print([name, params])
        total_params += params
    print(f"Total Trainable Params: {total_params}")
    return total_params

In [6]:
model.state_dict()

OrderedDict([('nn_stack.0.weight',
              tensor([[ 0.2885,  1.9921],
                      [-0.2131,  1.7183]])),
             ('nn_stack.0.bias', tensor([-1.3282, -0.7333])),
             ('nn_stack.2.weight', tensor([[0.9731, 1.3363]])),
             ('nn_stack.2.bias', tensor([-0.6224]))])

In [117]:
print(model(X_test[0]), Y_test[0])
print(model(X_test[1]), Y_test[1])

tensor([0.0474], grad_fn=<ViewBackward0>) tensor([0.])
tensor([0.9325], grad_fn=<ViewBackward0>) tensor([1.])
