In [153]:
import torch
import torch.nn as nn
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"

torch.set_default_device(device)
torch.set_printoptions(sci_mode=False)
print(device)


class TwoLayerNeuralNetwork(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.first_layer = nn.Linear(in_features=7, out_features=2, dtype=torch.float)

        self.second_layer = nn.Linear(in_features=2, out_features=4, dtype=torch.float)
        self.sigmoid = nn.Tanh()

    def forward(self, x: torch.Tensor):
        out = self.sigmoid(self.first_layer(x))
        out = self.sigmoid(self.second_layer(out))
        return out


model = TwoLayerNeuralNetwork()
model.state_dict()

cuda


OrderedDict([('first_layer.weight',
              tensor([[-0.3025, -0.0897,  0.3098, -0.0513, -0.1707, -0.1184,  0.0990],
                      [-0.3191, -0.2148, -0.2347,  0.1795, -0.3085, -0.1558, -0.1560]],
                     device='cuda:0')),
             ('first_layer.bias', tensor([-0.1609,  0.2522], device='cuda:0')),
             ('second_layer.weight',
              tensor([[ 0.0281, -0.6448],
                      [ 0.2680, -0.2664],
                      [-0.5495, -0.1065],
                      [ 0.0486, -0.6906]], device='cuda:0')),
             ('second_layer.bias',
              tensor([0.2220, 0.5103, 0.1454, 0.3744], device='cuda:0'))])

In [154]:
X = torch.tensor(
    [
        [1, -1, 1, 1, 1, 1, 1],
        [1, 1, 1, -1, -1, -1, -1],
        [1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, -1, 1, 1],
    ],
    dtype=torch.float,
    requires_grad=True,
)
y = torch.tensor(
    [
        [1, -1, -1, -1],
        [-1, 1, -1, -1],
        [-1, -1, 1, -1],
        [-1, -1, -1, 1],
    ],
    dtype=torch.float,
    requires_grad=True,
)
epochs = 5000
alpha = 0.18
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), alpha)

for epoch in range(epochs):
    model.train()
    y_pred = model(X)

    loss = loss_function(y_pred, y)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()
    if epoch % (epochs / 10) == 0:
        print(f"Epoch: {epoch}| loss: {loss}")

Epoch: 0| loss: 1.637026309967041


Epoch: 500| loss: 0.0004607658484019339
Epoch: 1000| loss: 0.00010735228715930134
Epoch: 1500| loss: 4.973559043719433e-05
Epoch: 2000| loss: 2.8505539376055822e-05
Epoch: 2500| loss: 1.812677874113433e-05
Epoch: 3000| loss: 1.2237588634889107e-05
Epoch: 3500| loss: 8.579009772802237e-06
Epoch: 4000| loss: 6.1675996221310925e-06
Epoch: 4500| loss: 4.512412033363944e-06


In [155]:
print(model(X))
print(model.state_dict())

tensor([[ 0.9977, -0.9981, -0.9989, -1.0000],
        [-1.0000,  0.9989, -1.0000, -0.9973],
        [-0.9974, -1.0000,  0.9979, -0.9977],
        [-1.0000, -0.9984, -0.9974,  0.9970]], device='cuda:0',
       grad_fn=<TanhBackward0>)
OrderedDict([('first_layer.weight', tensor([[ 0.3147, -1.2990,  0.9270, -0.6780, -1.3450, -0.7451, -0.5276],
        [ 0.9197,  2.0293,  1.0040, -0.8299, -2.5179, -1.1651, -1.1653]],
       device='cuda:0')), ('first_layer.bias', tensor([0.4563, 1.4910], device='cuda:0')), ('second_layer.weight', tensor([[ 2.1283, -6.7577],
        [ 6.0719, -0.3066],
        [-9.5249,  0.6328],
        [-5.4550,  8.8971]], device='cuda:0')), ('second_layer.bias', tensor([-2.7595, -2.0265, -5.9002, -6.7501], device='cuda:0'))])
