In [1]:
import torch
from tqdm import tqdm
from torch import nn

# Universal Neural Network class

We can specify arbitrary number of layers and their sizes

In [2]:
class ModularNeuralNetwork(nn.Module):
    def __init__(self, 
                 input_size: int, 
                 output_size: int, 
                 layer_sizes: tuple[int] = (), 
                 activation_function: nn.modules.activation = nn.Tanh()):
        super(ModularNeuralNetwork, self).__init__()
        if len(layer_sizes) == 0:
            self.layers = nn.Sequential(
                nn.Linear(in_features=input_size, out_features=output_size),
                activation_function
            )
        elif len(layer_sizes) == 1:
            size = layer_sizes[0]
            self.layers = nn.Sequential(
                nn.Linear(in_features=input_size, out_features=size),
                activation_function,
                nn.Linear(in_features=size, out_features=output_size),
                activation_function
            )
        else:
            layers = []
            for index, layer_size in enumerate(layer_sizes):
                if index == 0:
                    layer = nn.Linear(in_features=input_size, out_features=layer_size)
                else:
                    layer = nn.Linear(in_features=layer_sizes[index - 1], 
                                      out_features=layer_size)
                layers += [layer, activation_function]
            layers += [nn.Linear(in_features=layer_sizes[-1], 
                                 out_features=output_size), 
                       activation_function]
            self.layers = nn.Sequential(*layers)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.layers(x)

# Adding Binary Numbers

In [3]:
X = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float32)

In [4]:
input_size, output_size = 2, 2
hidden_layers = []
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Sigmoid()
  )
)

In [5]:
def train(epochs):
    model.train()
    loss = 0
    for _ in tqdm(range(epochs)):
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Last loss value: {loss}')
    model.eval()
    with torch.inference_mode():
        for i, x in enumerate(X):
            y_ = model(x)
            print(f'{x[0].item()} + {x[1].item()} = {y_[0].item():.4f} {y_[1].item():.4f}')

In [6]:
N_epochs = 10000
train(N_epochs)

100%|██████████| 10000/10000 [00:03<00:00, 2904.02it/s]

Last loss value: 0.127850741147995
0.0 + 0.0 = 0.0009 0.5000
0.0 + 1.0 = 0.0816 0.5000
1.0 + 0.0 = 0.0816 0.5000
1.0 + 1.0 = 0.9026 0.5000





First bit is predicted correctly, but second is random. We need more layers and neurons.

In [7]:
input_size, output_size = 2, 2
hidden_layers = [2]
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=2, out_features=2, bias=True)
    (3): Sigmoid()
  )
)

In [8]:
N_epochs = 50000
train(N_epochs)

100%|██████████| 50000/50000 [00:21<00:00, 2363.45it/s]

Last loss value: 0.00047935720067471266
0.0 + 0.0 = 0.0017 0.0242
0.0 + 1.0 = 0.0187 0.9731
1.0 + 0.0 = 0.0187 0.9731
1.0 + 1.0 = 0.9779 0.0247





Probabilities are already correct, but let's more neurons

In [9]:
input_size, output_size = 2, 2
hidden_layers = [20]
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=20, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=20, out_features=2, bias=True)
    (3): Sigmoid()
  )
)

In [10]:
N_epochs = 50000
train(N_epochs)

100%|██████████| 50000/50000 [00:24<00:00, 2056.30it/s]

Last loss value: 0.00020986064919270575
0.0 + 0.0 = 0.0000 0.0169
0.0 + 1.0 = 0.0108 0.9804
1.0 + 0.0 = 0.0098 0.9845
1.0 + 1.0 = 0.9861 0.0191





We can see a small improvement. Let's add more layers.

In [11]:
input_size, output_size = 2, 2
hidden_layers = [5, 5]
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=5, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=5, out_features=5, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=5, out_features=2, bias=True)
    (5): Sigmoid()
  )
)

In [12]:
N_epochs = 50000
train(N_epochs)

100%|██████████| 50000/50000 [00:24<00:00, 2028.69it/s]

Last loss value: 0.00022150261793285608
0.0 + 0.0 = 0.0003 0.0189
0.0 + 1.0 = 0.0103 0.9811
1.0 + 0.0 = 0.0104 0.9830
1.0 + 1.0 = 0.9868 0.0195





In [13]:
input_size, output_size = 2, 2
hidden_layers = [20, 20, 20, 20]
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=20, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=20, out_features=20, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=20, out_features=20, bias=True)
    (5): Sigmoid()
    (6): Linear(in_features=20, out_features=20, bias=True)
    (7): Sigmoid()
    (8): Linear(in_features=20, out_features=2, bias=True)
    (9): Sigmoid()
  )
)

In [14]:
N_epochs = 50000
train(N_epochs)

100%|██████████| 50000/50000 [00:40<00:00, 1238.77it/s]

Last loss value: 0.21557700634002686
0.0 + 0.0 = 0.2437 0.4972
0.0 + 1.0 = 0.2602 0.5006
1.0 + 0.0 = 0.2545 0.4995
1.0 + 1.0 = 0.2699 0.5026





# Multiplication

In [15]:
X = torch.tensor([0, 1], dtype=torch.float32).view(-1, 1)
y = torch.tensor([[0, 0], [1, 0]], dtype=torch.float32)

In [16]:
input_size, output_size = 1, 2
hidden_layers = []
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=1, out_features=2, bias=True)
    (1): Sigmoid()
  )
)

In [17]:
def train(epochs):
    model.train()
    loss = 0
    for _ in tqdm(range(epochs)):
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Last loss value: {loss}')
    model.eval()
    with torch.inference_mode():
        for i, x in enumerate(X):
            y_ = model(x)
            print(f'{x.item()} * 2 = {y_[0].item():.4f} {y_[1].item():.4f}')

In [18]:
N_epochs = 10000
train(N_epochs)

100%|██████████| 10000/10000 [00:03<00:00, 2962.09it/s]

Last loss value: 0.0008655674173496664
0.0 * 2 = 0.0419 0.0229
1.0 * 2 = 0.9660 0.0049



