In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from typing import Callable, Any, List

class Net(nn.Module):
    def __init__(
        self,
        structure: List[int],
        initializer: Callable[..., Any] = lambda w: nn.init.uniform_(w, -1.0, 1.0),
    ):
        super(Net, self).__init__()
        # an affine operation: y = Wx + b
        self.layer_num: int = len(structure)
        for i in range(1, self.layer_num):
            setattr(self, "fc%d" % i, nn.Linear(structure[i - 1], structure[i]))
            initializer(getattr(self, "fc%d" % i).weight)

    def forward(self, x: torch.Tensor):
        for i in range(1, self.layer_num):
            x = torch.sigmoid(getattr(self, "fc%d" % i)(x))
        return x

#     def forward(self, x: torch.Tensor):
#         for i in range(1, self.layer_num-1):
#             x = F.relu(getattr(self, "fc%d" % i)(x))
#         i += 1
#         return torch.sigmoid(getattr(self, "fc%d" % i)(x))

In [35]:
# ----------- Selecting Optimizer -----------
if __name__ == "__main__":
    TRAINING_DATA: torch.Tensor = torch.tensor(
        [[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]
    )
    TEACHING_DATA: torch.Tensor = torch.tensor([[0.0], [1.0], [1.0], [0.0]])
    EPOCH: int = 10000
    error_boundary: float = 1e-3
    structure = [2, 2, 1]
    initial_value = (6/(structure[0]+structure[-1]))**0.5
    initializer: Callable[..., Any] = lambda weights: nn.init.uniform_(
        weights, -initial_value, initial_value
    )
    net = Net(structure, initializer)
    criterion = nn.MSELoss()
    learning_rate: float = 0.3
    optimizer = optim.SGD(net.parameters(), lr=learning_rate)
#     optimizer = optim.Adam(net.parameters(), lr = 0.0001)
    training_data_indexes = np.arange(len(TRAINING_DATA))
    Loss = []
    for epoch in range(EPOCH):
        error = []
        for data_index in training_data_indexes:
            optimizer.zero_grad()
            output = net(TRAINING_DATA[data_index])
            loss = criterion(output, TEACHING_DATA[data_index])
            loss.backward()
            optimizer.step()
            error.append(float(loss.mean()))
        Loss.append(np.mean(error))
        if Loss[epoch] < error_boundary:
            print(epoch, Loss[epoch])
            print("----- End Learning -----")
            break
        if epoch % 1000 == 0:
            print(epoch, Loss[epoch])
    for training_data in TRAINING_DATA:
        output = net(training_data)
        print(output)

0 0.28729701042175293
1000 0.14207304548472166
2000 0.1325465442496352
3000 0.13128150466945954
4000 0.13080816861474887
5000 0.13056330237304792
6000 0.13041438320215093
7000 0.13031446338573005
8000 0.13024294016940985
9000 0.13018927012672066
tensor([0.0158], grad_fn=<SigmoidBackward>)
tensor([0.9866], grad_fn=<SigmoidBackward>)
tensor([0.4899], grad_fn=<SigmoidBackward>)
tensor([0.4907], grad_fn=<SigmoidBackward>)


tensor([0.0000, 0.0000, 0.0343, 0.0815, 0.0000])
