In [1]:
!unzip -q /content/my_library.zip -d /content/

# XOR Neural Network Example

This text demonstrates a simple neural network built from scratch to solve the XOR problem using NumPy. The network has a 2-2-1 architecture, meaning it has 2 input neurons, 2 neurons in a hidden layer, and 1 output neuron.

## Dataset

We use the classic XOR truth table as the dataset. The network should learn to output 1 when the inputs differ and 0 when they are the same.

## Network Components

Dense Layer: Fully connected layer handling linear transformations.

Activation Function (Sigmoid): Introduces non-linearity, mapping outputs between 0 and 1.

Loss Function (MSE): Mean Squared Error, measuring the difference between predicted and true outputs.

Optimizer (SGD): Stochastic Gradient Descent, updating weights based on computed gradients.

Sequential Model: Container that chains layers together, handling forward and backward passes.

## Training Process

The network trains for multiple epochs. Each epoch performs:

**Forward pass:** Compute the network’s prediction.

**Loss calculation:** Measure how far predictions are from true outputs.

**Backward pass:** Compute gradients via backpropagation.

**Weight update:** Adjust weights using the optimizer.

## Expected Output

The training prints the loss every 1000 epochs.

After training, predictions are shown for all XOR inputs.

**Note:** In this configuration, the network may not successfully learn XOR because a single hidden layer with 2 neurons and sigmoid activation is often insufficient. Predictions tend to be around 0.5 for all inputs.

## Classes and Functions

**Dense:** Fully connected layer.

**Sigmoid / Tanh:** Activation function.

**MSE:** Mean Squared Error loss.

**SGD:** Optimizer using gradient descent.

**Sequential:** Handles forward and backward passes for all layers.

## Summary

This notebook demonstrates the complete workflow of building and training a neural network from scratch: forward pass → loss computation → backward pass → weight update. It highlights the limitations of small networks and the importance of proper architecture for solving non-linear problems like XOR.

learning rate:

# user choices:


1st layer i/ps: 2

1st layer o/ps: 2


---


2nd layer i/ps: 2

2nd layer o/ps: 1



---
activation function : sigmoid, for both layers.


---



**learning rate**: 0.5



---

no. of iterations : 10




---


In [2]:
import sys
sys.path.append('/content/my_library')
import numpy as np
from layers import Dense
from activations import Sigmoid
from losses import MSE
from optimizers import SGD
from model import Sequential

# XOR dataset
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])

# Create tiny network: 2 inputs -> 2 hidden -> 1 output
layers = [
    Dense(2, 2, activation=Sigmoid()),
    Dense(2, 1, activation=Sigmoid())
]

model = Sequential(layers)
loss_fn = MSE()
optimizer = SGD(lr=0.5)

# Training loop (just a few epochs)
for epoch in range(10000):
    # Forward
    out = model.forward(X)
    loss = loss_fn.forward(out, y)

    # Backward
    dA = loss_fn.backward()
    model.backward(dA)

    # Update weights
    for layer in layers:
        optimizer.step(layer)

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, loss={loss}")

# Test predictions
pred = model.forward(X)
print("Predictions:\n", pred)


Epoch 0, loss=0.25000895005801566
Epoch 1000, loss=0.25000000002035444
Epoch 2000, loss=0.25000000002026423
Epoch 3000, loss=0.250000000020174
Epoch 4000, loss=0.2500000000200839
Epoch 5000, loss=0.250000000019994
Epoch 6000, loss=0.25000000001990424
Epoch 7000, loss=0.25000000001981454
Epoch 8000, loss=0.25000000001972505
Epoch 9000, loss=0.2500000000196356
Predictions:
 [[0.50000409]
 [0.49999935]
 [0.50000066]
 [0.49999591]]


## Gradiant check ( Analytical vs Numerical):
For analytical the formula is: ∂L/∂W ≈ [L(W + ε) - L(W - ε)]/(2ε)



In [None]:
# add analytical gradiant descent


# user choices2:


1st layer i/ps: 2

1st layer o/ps: 4


---


2nd layer i/ps: 4

2nd layer o/ps: 1




---
activation function: tanh for first layer and sigmoid for second layer.


---
starting from 0.1, lowering for stability until it ouutputs the correct results..

**learning rate**: 0.075



---

no. of iterations : 10




---




In [12]:
import sys
sys.path.append('/content/my_library')
import numpy as np
from layers import Dense
from activations import Sigmoid, Tanh
from losses import MSE
from optimizers import SGD
from model import Sequential

# XOR dataset
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])

# Create network: 2 inputs -> 4 hidden -> 1 output
# Hidden layer uses Tanh, output layer uses Sigmoid
layers = [
    Dense(2, 4, activation=Tanh()),      # Hidden layer
    Dense(4, 1, activation=Sigmoid())    # Output layer
]

model = Sequential(layers)
loss_fn = MSE()
optimizer = SGD(lr=0.075)  # lower learning rate for stability with Tanh

# Training loop
for epoch in range(10000):
    # Forward pass
    out = model.forward(X)
    loss = loss_fn.forward(out, y)

    # Backward pass
    dA = loss_fn.backward()
    model.backward(dA)

    # Update weights
    for layer in layers:
        optimizer.step(layer)

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, loss={loss}")

# Test predictions
pred = model.forward(X)
print("\nPredictions:\n", pred)


Epoch 0, loss=0.250000001070065
Epoch 1000, loss=0.2500000008247674
Epoch 2000, loss=0.25000000082052476
Epoch 3000, loss=0.25000000081630586
Epoch 4000, loss=0.25000000081211066
Epoch 5000, loss=0.2500000008079389
Epoch 6000, loss=0.25000000080379037
Epoch 7000, loss=0.2500000007996649
Epoch 8000, loss=0.2500000007955622
Epoch 9000, loss=0.2500000007914822

Predictions:
 [[0.50001509]
 [0.49997948]
 [0.50002053]
 [0.49998492]]


# Using tensorFlow

In [None]:
#tensorflow for xor