# Unnecessarily verbose PyTorch example

- See the *Deep Learning* textbook for an explanation about why this model cannot learn the XOR function.

In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
from dataclasses import dataclass

Define XOR dataset:

In [26]:
X = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32) # inputs
y = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32) # outputs

Define model:

In [27]:
class XorLearner(nn.Module):
    def __init__(self, hidden_layer = False):
        super().__init__()
        self.hidden = nn.Linear(2, 2) if hidden_layer else None
        self.output = nn.Linear(2, 1)

    def forward(self, x):
        if (self.hidden): x = torch.relu(self.hidden(x))
        x = self.output(x)
        return x

Model wrapper class:

In [28]:
@dataclass
class ModelWrapper:
    model: nn.Module
    optimiser: optim.Optimizer

Define and wrap models:

In [29]:
models = [XorLearner(), XorLearner(hidden_layer=True)]
optimisers = [optim.SGD(model.parameters(), lr=0.1) for model in models]
modelWrappers = [ModelWrapper(models[i], optimisers[i]) for i in range(2)]

for wrapper in modelWrappers:
    print(wrapper.model.parameters)

<bound method Module.parameters of XorLearner(
  (output): Linear(in_features=2, out_features=1, bias=True)
)>
<bound method Module.parameters of XorLearner(
  (hidden): Linear(in_features=2, out_features=2, bias=True)
  (output): Linear(in_features=2, out_features=1, bias=True)
)>


Define loss and optimiser:
- `lr` is learning rate

In [30]:
criterion = nn.MSELoss()

Train model
- [`torch.optim.Optimizer.zero_grad()`](https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html)
- [`torch.optim.Optimizer.step()`](https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.step.html)
- [`torch.Tensor.backward()`](https://pytorch.org/docs/stable/generated/torch.Tensor.backward.html)

In [31]:
num_epochs = 1000
for wrapper in modelWrappers:
    print(f"Training model {wrapper.model}")

    for epoch in range(num_epochs):
        outputs = wrapper.model(X)
        loss = criterion(outputs, y)
        wrapper.optimiser.zero_grad()
        loss.backward()
        wrapper.optimiser.step()

        if epoch % 100 == 0:
            print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item()}") 

    print("")

Training model XorLearner(
  (output): Linear(in_features=2, out_features=1, bias=True)
)
Epoch [0/1000], Loss: 1.47379469871521
Epoch [100/1000], Loss: 0.25008296966552734
Epoch [200/1000], Loss: 0.25000011920928955
Epoch [300/1000], Loss: 0.25
Epoch [400/1000], Loss: 0.2499999850988388
Epoch [500/1000], Loss: 0.25
Epoch [600/1000], Loss: 0.25
Epoch [700/1000], Loss: 0.25
Epoch [800/1000], Loss: 0.25
Epoch [900/1000], Loss: 0.25

Training model XorLearner(
  (hidden): Linear(in_features=2, out_features=2, bias=True)
  (output): Linear(in_features=2, out_features=1, bias=True)
)
Epoch [0/1000], Loss: 0.37634336948394775
Epoch [100/1000], Loss: 0.17715170979499817
Epoch [200/1000], Loss: 0.04685131460428238
Epoch [300/1000], Loss: 5.050896288594231e-05
Epoch [400/1000], Loss: 2.3386069969433265e-08
Epoch [500/1000], Loss: 1.075414497037075e-11
Epoch [600/1000], Loss: 3.393093098158495e-13
Epoch [700/1000], Loss: 1.5818683168911107e-13
Epoch [800/1000], Loss: 1.1001356076123514e-13
Epoch

Test model

In [32]:
for wrapper in modelWrappers:
    print(wrapper.model)
    with torch.no_grad():
        predictions = wrapper.model(X)
        print("Predictions:")
        print(predictions)
        print("")

XorLearner(
  (output): Linear(in_features=2, out_features=1, bias=True)
)
Predictions:
tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000]])

XorLearner(
  (hidden): Linear(in_features=2, out_features=2, bias=True)
  (output): Linear(in_features=2, out_features=1, bias=True)
)
Predictions:
tensor([[ 4.3586e-07],
        [ 1.0000e+00],
        [ 1.0000e+00],
        [-1.8626e-09]])

