In [1]:
import numpy as np

from nn.activations import ReLU
from nn.layers import Linear
from nn.losses import SoftmaxCrossEntropyLoss
from nn.optimizers import SGD, Adam

## XOR dataset

  1. XOR Dataset:
     
  Inputs: [[0,0], [0,1], [1,0], [1,1]]

  Targets: [[1,0], [0,1], [0,1], [1,0]]  # one-hot encoded

In [2]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # XOR input
Y = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])

## Train with Simple 2 layer NN

  - Linear(2, 4) → ReLU → Linear(4, 2) → Softmax
  - Use SoftmaxCrossEntropyLoss

### SGD

In [18]:
linear1 = Linear(input_dim=2, output_dim=4, initializer='he')
relu = ReLU()
linear2 = Linear(input_dim=4, output_dim=2, initializer='xavier')
softmaxCE = SoftmaxCrossEntropyLoss()

In [19]:
def train(epochs, optimizer):
    for epoch in range(epochs):
        logits = linear2(relu(linear1(X)))
        loss = softmaxCE(logits, Y)
        if epoch % 500 == 0:
            print("epoch:", epoch, "loss:", loss)
        linear1.backward(relu.backward(linear2.backward(softmaxCE.backward())))
        optimizer.step()
        optimizer.zero_grad()

In [20]:
epochs = 5001
optimizer = SGD(layers=[linear1, linear2], learning_rate=1e-1)

train(epochs, optimizer)

epoch: 0 loss: 2.4268216574368684
epoch: 500 loss: 0.03508113566722938
epoch: 1000 loss: 0.01326204148838499
epoch: 1500 loss: 0.007891055095969208
epoch: 2000 loss: 0.005541476524895075
epoch: 2500 loss: 0.0042393443348534045
epoch: 3000 loss: 0.0034194533546276972
epoch: 3500 loss: 0.00285697688561368
epoch: 4000 loss: 0.002449187125737941
epoch: 4500 loss: 0.002140238436047561
epoch: 5000 loss: 0.0018986330039013964


### Adam

In [13]:
linear1 = Linear(input_dim=2, output_dim=4, initializer='he')
relu = ReLU()
linear2 = Linear(input_dim=4, output_dim=2, initializer='xavier')
softmaxCE = SoftmaxCrossEntropyLoss()

In [9]:
def train(epochs, optimizer):
    for epoch in range(epochs):
        logits = linear2(relu(linear1(X)))
        loss = softmaxCE(logits, Y)
        if epoch % 500 == 0:
            print("epoch:", epoch, "loss:", loss)
        linear1.backward(relu.backward(linear2.backward(softmaxCE.backward())))
        optimizer.step()
        optimizer.zero_grad()

In [14]:
epochs = 5001
optimizer = Adam(layers=[linear1, linear2], learning_rate=1e-2)

train(epochs, optimizer)

epoch: 0 loss: 0.6595445790476276
epoch: 500 loss: 0.004427692156744142
epoch: 1000 loss: 0.0011999582080596807
epoch: 1500 loss: 0.0005478689969889016
epoch: 2000 loss: 0.0003058022996461388
epoch: 2500 loss: 0.00018960501393938938
epoch: 3000 loss: 0.00012520999307008722
epoch: 3500 loss: 8.618368402741827e-05
epoch: 4000 loss: 6.10186773330791e-05
epoch: 4500 loss: 4.4099473860030817e-05
epoch: 5000 loss: 3.2344495457129494e-05


### Inference

In [15]:
logits = linear2(relu(linear1(X)))
predictions = np.argmax(logits, axis=1)

In [16]:
targets = np.argmax(Y, axis=1)

In [17]:
print("predictions:", predictions)
print("targets:", targets)
print("accuracy:", np.mean(predictions == targets))

predictions: [0 1 1 0]
targets: [0 1 1 0]
accuracy: 1.0
