# PyTorch XOR machine
See the *Deep Learning* textbook for an explanation about why the first model cannot learn the XOR function.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from dataclasses import dataclass

Define XOR dataset:

In [2]:
X = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32) # inputs
y = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32) # outputs

Define model:

In [3]:
class XorMachine(nn.Module):
    def __init__(self, hidden_layer = False):
        super().__init__()
        self.hidden = nn.Linear(2, 2) if hidden_layer else None
        self.output = nn.Linear(2, 1)

    def forward(self, x):
        if (self.hidden): x = torch.relu(self.hidden(x))
        x = self.output(x)
        return x

Model wrapper class:

In [4]:
@dataclass
class ModelWrapper:
    model: nn.Module
    optimiser: optim.Optimizer

Define and wrap models:

In [5]:
models = [XorMachine(), XorMachine(hidden_layer=True)]
optimisers = [optim.SGD(model.parameters(), lr=0.1) for model in models]
modelWrappers = [ModelWrapper(models[i], optimisers[i]) for i in range(2)]

Train model
- [`torch.optim.Optimizer.zero_grad()`](https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html)
- [`torch.optim.Optimizer.step()`](https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.step.html)
- [`torch.Tensor.backward()`](https://pytorch.org/docs/stable/generated/torch.Tensor.backward.html)

In [6]:
criterion = nn.MSELoss()
num_epochs = 1000
for wrapper in modelWrappers:
    print(f"Training model {wrapper.model}")

    for epoch in range(num_epochs):
        outputs = wrapper.model(X)
        loss = criterion(outputs, y)
        wrapper.optimiser.zero_grad()
        loss.backward()
        wrapper.optimiser.step()

        if epoch % 100 == 0:
            print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item()}") 

    print("")

Training model XorMachine(
  (output): Linear(in_features=2, out_features=1, bias=True)
)
Epoch [0/1000], Loss: 0.3647574186325073
Epoch [100/1000], Loss: 0.2501237988471985
Epoch [200/1000], Loss: 0.2500002086162567
Epoch [300/1000], Loss: 0.25
Epoch [400/1000], Loss: 0.25
Epoch [500/1000], Loss: 0.25
Epoch [600/1000], Loss: 0.25
Epoch [700/1000], Loss: 0.25
Epoch [800/1000], Loss: 0.25
Epoch [900/1000], Loss: 0.25

Training model XorMachine(
  (hidden): Linear(in_features=2, out_features=2, bias=True)
  (output): Linear(in_features=2, out_features=1, bias=True)
)
Epoch [0/1000], Loss: 0.34600308537483215
Epoch [100/1000], Loss: 0.15168118476867676
Epoch [200/1000], Loss: 0.033363569527864456
Epoch [300/1000], Loss: 0.0020663898903876543
Epoch [400/1000], Loss: 7.906011887826025e-05
Epoch [500/1000], Loss: 2.65501898866205e-06
Epoch [600/1000], Loss: 8.67745342247872e-08
Epoch [700/1000], Loss: 2.940852894539603e-09
Epoch [800/1000], Loss: 9.626699437603747e-11
Epoch [900/1000], Loss:

Test model

In [7]:
for wrapper in modelWrappers:
    print(wrapper.model)
    with torch.no_grad():
        predictions = wrapper.model(X)
        print("Predictions:")
        print(predictions)
        print("")

XorMachine(
  (output): Linear(in_features=2, out_features=1, bias=True)
)
Predictions:
tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000]])

XorMachine(
  (hidden): Linear(in_features=2, out_features=2, bias=True)
  (output): Linear(in_features=2, out_features=1, bias=True)
)
Predictions:
tensor([[8.3447e-07],
        [1.0000e+00],
        [1.0000e+00],
        [8.9407e-07]])

