## XOR Problem

(0, 0) -> 0 \
(0, 1) -> 1 \
(1, 0) -> 1 \
(1, 1) -> 0

issue is we cannot solve this with a linear equation (just imagine trying to plot it)



In [None]:
!pip install torch



In [None]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
import torch
from torch import nn
import numpy as np

In [None]:
"""

just scoping out input/output of my network

input will be a tensor like this: [0, 1]

output will be 0 or 1


model {
    Layer (2 x 4)
    Relu
    Layer (4 x 1)
}

"""

'\n\njust scoping out input/output of my network\n\ninput will be a tensor like this: [0, 1]\n\noutput will be 0 or 1\n\n\nmodel {\n    Layer (2 x 4)\n    Relu\n    Layer (4 x 1)\n}\n\n'

In [None]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [None]:
class XOR(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(2, 4, True),
            nn.ReLU(),
            nn.Linear(4, 1, True)
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

In [None]:
model = XOR().to(device)

In [None]:
model

XOR(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=4, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4, out_features=1, bias=True)
  )
)

In [None]:
N = 4000
# X = torch.randint(0, 2, (4000, 2), dtype=torch.float32).to(device)
# Xc = X + 0.05 * torch.randn_like(X)
# y = (X[:, 0].to(torch.long) ^ X[:, 1].to(torch.long)).to(torch.float).unsqueeze(1).to(device)

X = torch.tensor([
    [0., 0.],
    [0., 1.],
    [1., 0.],
    [1., 1.]
] * 1000, dtype=torch.float32).to(device)

y = torch.tensor([
    [0], [1], [1], [0]
] * 1000, dtype=torch.float32).to(device)

In [None]:
X

tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        ...,
        [0., 1.],
        [1., 0.],
        [1., 1.]], device='cuda:0')

In [None]:
y

tensor([[0.],
        [1.],
        [1.],
        ...,
        [1.],
        [1.],
        [0.]], device='cuda:0')

In [None]:
print(f"X.shape: {X.shape}")
print(f"y.shape: {y.shape}")

X.shape: torch.Size([4000, 2])
y.shape: torch.Size([4000, 1])


In [None]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(100):
    optimizer.zero_grad()

    out = model(X)

    loss = loss_fn(out, y)

    loss.backward()

    optimizer.step()

    # if epoch % 25 == 0:
    print(loss)
    with torch.no_grad():
        probs = torch.sigmoid(out)
        preds = (probs > 0.5).float()
        acc = (preds == y).float().mean().item()
        print(acc)


tensor(0.6359, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
0.7500000596046448
tensor(0.6346, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
0.7500000596046448
tensor(0.6321, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
0.7500000596046448
tensor(0.6302, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
0.7500000596046448
tensor(0.6279, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
0.7500000596046448
tensor(0.6254, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
0.7500000596046448
tensor(0.6227, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
0.7500000596046448
tensor(0.6201, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
0.7500000596046448
tensor(0.6175, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
0.7500000596046448
tensor(0.6152, device='cuda:0',
     

In [None]:
expected = [0, 1, 1, 0]
def validate_preds(logits):
  probs  = torch.sigmoid(logits)
  preds  = (probs > 0.5).int().squeeze(1).tolist()
  print("logits:", logits.squeeze(1).tolist())
  print("probs: ", probs.squeeze(1).tolist())
  print("preds: ", preds)
  print("expected", expected)
  for i in range(len(preds)):
    if preds[i] != expected[i]:
      return False
    return True

In [None]:
with torch.no_grad():
  vals = torch.tensor([[0.,0.],[0.,1.],[1.,0.],[1.,1.]], device=device)
  logits = model(vals)
  output = validate_preds(logits)
  print(output)

logits: [-0.04784664511680603, 0.2925662696361542, -0.28827327489852905, -0.003583282232284546]
probs:  [0.48804062604904175, 0.5726242661476135, 0.42842668294906616, 0.49910420179367065]
preds:  [0, 1, 0, 0]
expected [0, 1, 1, 0]
True


In [None]:
with torch.no_grad():
  vals = torch.tensor([[0.,0.],[0.,1.],[1.,0.],[1.,1.]], device=device)
  noisy_vals = vals + 0.05 * torch.randn_like(vals)
  logits = model(noisy_vals)
  output = validate_preds(logits)
  print(output)

logits: [-0.06444372236728668, 0.25932517647743225, -0.27967745065689087, -0.01651361584663391]
probs:  [0.4838946461677551, 0.5644704103469849, 0.430532842874527, 0.4958716928958893]
preds:  [0, 1, 0, 0]
expected [0, 1, 1, 0]
True
