# Load path

In [None]:
import sys
from pathlib import Path

# Get the absolute path to the parent directory (MLP-FROM-SCRATCH)
root_path = Path.cwd().parent  
sys.path.insert(0, str(root_path))
from core import *

# XOR problem implementation

In [2]:
# Define XOR dataset (inputs and expected outputs)
## Dataset
X_train = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_train = np.array([[0], [1], [1], [0]])

# Create neural network

In [3]:
## Create model
model = NeuralNetwork()
model.add_layer(Linear(2, 4, init_type= 'xavier'))
model.add_layer(ReLULayer()) 
model.add_layer(Linear(4, 1, init_type= 'he'))
model.add_layer(SigmoidLayer())

# Configure stochastic gradient descent optimizer

In [4]:
## Optimizer
optimizer = SGD(learning_rate= 0.01)
epochs = 100000  # Total number of training iterations

# Start training process
## Train
model.train()

# Training loop

In [5]:
for epoch in range(epochs):
    # Reset gradients before each forward pass
    model.zero_grad()

    # Forward pass: compute predictions
    y_prediction = model.forward(X_train)
    # Calculate mean squared error loss
    loss = mse_loss(y_prediction, y_train)

    # Backward pass: compute gradients
    loss_grad = mse_loss_derivative(y_prediction, y_train)
    model.backward(loss_grad)

    # Update model parameters using optimizer
    model.update_params(optimizer)

    # Print progress every 200 epochs
    if (epoch + 1) % 200 == 0:
        print(f"Epoch=: {epoch + 1}, loss: {loss}")

Epoch=: 200, loss: 0.2447437905792743
Epoch=: 400, loss: 0.23438967099901464
Epoch=: 600, loss: 0.22697141810568788
Epoch=: 800, loss: 0.22136111184995746
Epoch=: 1000, loss: 0.21711541189053873
Epoch=: 1200, loss: 0.212625341635504
Epoch=: 1400, loss: 0.207922730235754
Epoch=: 1600, loss: 0.20305589930118412
Epoch=: 1800, loss: 0.198108101557405
Epoch=: 2000, loss: 0.1931803886568939
Epoch=: 2200, loss: 0.18825489706941273
Epoch=: 2400, loss: 0.18351290127736364
Epoch=: 2600, loss: 0.1789079259192085
Epoch=: 2800, loss: 0.17454885340792822
Epoch=: 3000, loss: 0.17046138498250124
Epoch=: 3200, loss: 0.1666339214797559
Epoch=: 3400, loss: 0.16309458853215078
Epoch=: 3600, loss: 0.15979799445383136
Epoch=: 3800, loss: 0.15676800572545452
Epoch=: 4000, loss: 0.15400353976234477
Epoch=: 4200, loss: 0.15142753691764074
Epoch=: 4400, loss: 0.14903527191773797
Epoch=: 4600, loss: 0.14683148137075483
Epoch=: 4800, loss: 0.1447613739203699
Epoch=: 5000, loss: 0.14278316790696197
Epoch=: 5200, l

# Evaluation

In [6]:
model.eval()  # Set model to evaluation mode (affects certain layers if implemented)

# Make final predictions on training data
y_prediction_final = model.forward(X_train)
# Print input, target and prediction for each sample
for i in range(4):
    print(f"Input: {X_train[i]}, Target: {y_train[i][0]:.0f}, Prediction: {y_prediction_final[i][0]:.3f}")

Input: [0 0], Target: 0, Prediction: 0.041
Input: [0 1], Target: 1, Prediction: 0.981
Input: [1 0], Target: 1, Prediction: 0.986
Input: [1 1], Target: 0, Prediction: 0.014


In [7]:
# Save model
filepath = root_path / 'models' / 'xor_model.pkl'
save_checkpoint(model, optimizer, filepath)