# XOR Problem

In [None]:
import torch
import torch.nn as nn

In [None]:
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1),
    nn.Sigmoid()
)

X = torch.tensor([[1, 1], [1, 0], [0, 1], [0, 0]], dtype=torch.float32)
y = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)

In [32]:
device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else "cpu"
model = model.to(device)
X, y = X.to(device), y.to(device)

In [37]:
epochs = 3000
lr = 0.1

loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

model.train()
for epoch in range(1, epochs + 1):
    pred = model(X)
    loss = loss_fn(pred, y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

model.eval()
with torch.no_grad():
    pred = model(X) >= 0.5
    accuracy = (pred == y).sum() / 4
    print(f"Accuracy {accuracy * 100}%")

Accuracy 100.0%


In [7]:
print(model.layers[0].weight, model.layers[0].bias, sep="\n")

Parameter containing:
tensor([[ -5.2887,  -5.3866],
        [-11.6784, -11.6963]], device='mps:0', requires_grad=True)
Parameter containing:
tensor([-2.0605,  2.7910], device='mps:0', requires_grad=True)


# MNIST

In [129]:
from torch.utils.data import Dataset, DataLoader, Subset

from torchvision import datasets
from torchvision.transforms import ToTensor

device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else "cpu"

In [190]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 16),
            nn.Sigmoid(),
            nn.Linear(16, 16),
            nn.Sigmoid(),
            nn.Linear(16, 10),
        )
    
    def forward(self, x):
        return self.layers(x)

In [191]:
train_data = datasets.MNIST(root="data", train=True, download=True, transform=ToTensor())
test_data = datasets.MNIST(root="data", train=False, download=True, transform=ToTensor())

train_loader = DataLoader(train_data, batch_size=64)
test_loader = DataLoader(test_data, batch_size=64)

In [192]:
def train(dataloader, model, loss_fn, optimizer):
    model.train()

    for X, y in dataloader:
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()


def test(dataloader, model):
    model.eval()

    correct = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            
            pred = model(X).argmax(dim=1)
            correct += (pred == y).sum().item()
    
    accuracy = correct / len(dataloader.dataset)
    print(f"accuracy {accuracy * 100: .2f}%")


In [193]:
lr = 0.1
epochs = 10

model = Net().to(device)
optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(1, epochs + 1):
    train(train_loader, model, loss_fn, optimizer)
    
    print(f"Epoch {epoch}")
    test(test_loader, model)

Epoch 1
accuracy  29.36%
Epoch 2
accuracy  38.40%
Epoch 3
accuracy  58.96%
Epoch 4
accuracy  70.79%
Epoch 5
accuracy  77.52%
Epoch 6
accuracy  80.24%
Epoch 7
accuracy  82.51%
Epoch 8
accuracy  84.44%
Epoch 9
accuracy  85.96%
Epoch 10
accuracy  87.16%
