In [1]:
import torch
from tqdm import tqdm
from torch import nn

# Universal Neural Network class

We can specify arbitrary number of layers and their sizes

In [3]:
class ModularNeuralNetwork(nn.Module):
    def __init__(self, 
                 input_size: int, 
                 output_size: int, 
                 layer_sizes: tuple[int] = (), 
                 activation_function: nn.modules.activation = nn.Tanh()):
        super(ModularNeuralNetwork, self).__init__()
        if len(layer_sizes) == 0:
            self.layers = nn.Sequential(
                nn.Linear(in_features=input_size, out_features=output_size),
                activation_function
            )
        elif len(layer_sizes) == 1:
            size = layer_sizes[0]
            self.layers = nn.Sequential(
                nn.Linear(in_features=input_size, out_features=size),
                activation_function,
                nn.Linear(in_features=size, out_features=output_size),
                activation_function
            )
        else:
            layers = []
            for index, layer_size in enumerate(layer_sizes):
                if index == 0:
                    layer = nn.Linear(in_features=input_size, out_features=layer_size)
                else:
                    layer = nn.Linear(in_features=layer_sizes[index - 1], 
                                      out_features=layer_size)
                layers += [layer, activation_function]
            layers += [nn.Linear(in_features=layer_sizes[-1], 
                                 out_features=output_size), 
                       activation_function]
            self.layers = nn.Sequential(*layers)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.layers(x)

# Adding Binary Numbers

In [2]:
X = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float32)

In [4]:
input_size, output_size = 2, 2
hidden_layers = []
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Sigmoid()
  )
)

In [5]:
def train(epochs):
    model.train()
    loss = 0
    for _ in tqdm(range(epochs)):
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Last loss value: {loss}')
    model.eval()
    with torch.inference_mode():
        for i, x in enumerate(X):
            y_ = model(x)
            print(f'{x[0].item()} + {x[1].item()} = {y_[0].item():.4f} {y_[1].item():.4f}')

In [6]:
N_epochs = 10000
train(N_epochs)

100%|██████████| 10000/10000 [00:03<00:00, 2925.26it/s]

Last loss value: 0.12787023186683655
0.0 + 0.0 = 0.0009 0.5000
0.0 + 1.0 = 0.0819 0.5000
1.0 + 0.0 = 0.0819 0.5000
1.0 + 1.0 = 0.9022 0.5000





First bit is predicted correctly, but second is random. We need more layers and neurons.

In [7]:
input_size, output_size = 2, 2
hidden_layers = [2]
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=2, out_features=2, bias=True)
    (3): Sigmoid()
  )
)

In [8]:
N_epochs = 50000
train(N_epochs)

100%|██████████| 50000/50000 [00:20<00:00, 2408.25it/s]

Last loss value: 0.0007525425171479583
0.0 + 0.0 = 0.0003 0.0378
0.0 + 1.0 = 0.0149 0.9674
1.0 + 0.0 = 0.0149 0.9674
1.0 + 1.0 = 0.9768 0.0385





Probabilities are already correct, but let's more neurons

In [9]:
input_size, output_size = 2, 2
hidden_layers = [20]
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=20, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=20, out_features=2, bias=True)
    (3): Sigmoid()
  )
)

In [10]:
N_epochs = 50000
train(N_epochs)

100%|██████████| 50000/50000 [00:24<00:00, 2067.06it/s]

Last loss value: 0.0002571965451352298
0.0 + 0.0 = 0.0000 0.0196
0.0 + 1.0 = 0.0090 0.9795
1.0 + 0.0 = 0.0101 0.9796
1.0 + 1.0 = 0.9866 0.0216





We can see a small improvement. Let's add more layers.

In [12]:
input_size, output_size = 2, 2
hidden_layers = [5, 5]
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=5, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=5, out_features=5, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=5, out_features=2, bias=True)
    (5): Sigmoid()
  )
)

In [13]:
N_epochs = 50000
train(N_epochs)

100%|██████████| 50000/50000 [00:25<00:00, 1989.38it/s]

Last loss value: 0.00025495467707514763
0.0 + 0.0 = 0.0009 0.0203
0.0 + 1.0 = 0.0108 0.9812
1.0 + 0.0 = 0.0109 0.9808
1.0 + 1.0 = 0.9848 0.0209





In [14]:
input_size, output_size = 2, 2
hidden_layers = [20, 20, 20, 20]
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=20, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=20, out_features=20, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=20, out_features=20, bias=True)
    (5): Sigmoid()
    (6): Linear(in_features=20, out_features=20, bias=True)
    (7): Sigmoid()
    (8): Linear(in_features=20, out_features=2, bias=True)
    (9): Sigmoid()
  )
)

In [15]:
N_epochs = 50000
train(N_epochs)

100%|██████████| 50000/50000 [00:39<00:00, 1256.88it/s]

Last loss value: 0.00017881262465380132
0.0 + 0.0 = 0.0000 0.0175
0.0 + 1.0 = 0.0075 0.9829
1.0 + 0.0 = 0.0075 0.9827
1.0 + 1.0 = 0.9910 0.0184





# Multiplication

In [16]:
X = torch.tensor([0, 1], dtype=torch.float32).view(-1, 1)
y = torch.tensor([[0, 0], [1, 0]], dtype=torch.float32)

In [17]:
input_size, output_size = 1, 2
hidden_layers = []
learning_rate = 0.2
model = ModularNeuralNetwork(input_size=input_size,
                             output_size=output_size,
                             layer_sizes=hidden_layers,
                             activation_function=nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
model

ModularNeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=1, out_features=2, bias=True)
    (1): Sigmoid()
  )
)

In [18]:
def train(epochs):
    model.train()
    loss = 0
    for _ in tqdm(range(epochs)):
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Last loss value: {loss}')
    model.eval()
    with torch.inference_mode():
        for i, x in enumerate(X):
            y_ = model(x)
            print(f'{x.item()} * 2 = {y_[0].item():.4f} {y_[1].item():.4f}')

In [19]:
N_epochs = 10000
train(N_epochs)

100%|██████████| 10000/10000 [00:03<00:00, 3002.55it/s]

Last loss value: 0.0008660536841489375
0.0 * 2 = 0.0419 0.0226
1.0 * 2 = 0.9659 0.0056



