-a single hidden layer with 4 neurons

In [1]:
import torch
from torch import nn

In [2]:
# Define training data (XOR truth table)
inputs = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float)
targets = torch.tensor([0, 1, 1, 0], dtype=torch.float)

In [3]:
# Model architecture
class XOR(nn.Module):
  def __init__(self):
    super(XOR, self).__init__()
    self.fc1 = nn.Linear(2, 4)  # Input layer with 2 neurons to hidden layer with 4 neurons
    self.activation = nn.Sigmoid()  # Sigmoid activation function for hidden layer
    self.fc2 = nn.Linear(4, 1)  # Hidden layer with 4 neurons to output layer with 1 neuron

  def forward(self, x):
    x = self.fc1(x)
    x = self.activation(x)
    x = self.fc2(x)
    return torch.sigmoid(x)  # Sigmoid activation function for output layer

In [4]:
# Create the model and optimizer
model = XOR()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)  # SGD optimizer with learning rate 0.1


In [16]:
# Training loop
for epoch in range(100000):
  # Forward pass
  outputs = model(inputs)
  # Reshape target to match output size
  targets_reshaped = targets.unsqueeze(dim=1)
  # Calculate loss (mean squared error)
  loss = torch.nn.functional.mse_loss(outputs, targets_reshaped)

  # Backward pass and update weights
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

In [8]:
print(model.state_dict())

OrderedDict([('fc1.weight', tensor([[-0.0028,  0.6361],
        [-0.0800, -0.3557],
        [ 0.2645, -0.3963],
        [-0.0905, -0.2817]])), ('fc1.bias', tensor([-0.3924, -0.4060,  0.2107, -0.5409])), ('fc2.weight', tensor([[-0.4093, -0.3060,  0.0430, -0.1384]])), ('fc2.bias', tensor([0.3256]))])


In [17]:
# Test the model
with torch.no_grad():
  for i in range(4):
    predicted_output = model(inputs[i])
    print(f"Input: {inputs[i]}, Predicted Output: {predicted_output.item():.4f}")

Input: tensor([0., 0.]), Predicted Output: 0.0154
Input: tensor([0., 1.]), Predicted Output: 0.9840
Input: tensor([1., 0.]), Predicted Output: 0.9839
Input: tensor([1., 1.]), Predicted Output: 0.0158
