# 00 - FFNN Simple Test: XOR
Simple Neural Network that is able to calculate XOR

#### Set up

In [43]:
import numpy as np

## Neural Network definition
input_size = 2 # Number of input features
hidden_layer_nodes = 4 # Number of nodes in the hidden layer
output_size = 1 # Number of output features

## Training Parameters
learning_rate = 0.01 # Value used to update weights
num_epochs = 1000 # Number of iterations for training

#### Define Training Data

In [3]:
np.random.seed(42)

X = np.array(
  [
    [0,0],
    [0,1],
    [1,0],
    [1,1]
  ]
)

Y = np.array(
  [
    [0],
    [1],
    [1],
    [0]
  ]
)

In [8]:
X[0]

array([0, 0])

In [9]:
Y[0]

array([0])

In [23]:
X.shape

(4, 2)

In [24]:
Y.shape

(4, 1)

#### Calculate weights
The `np.random.rand` method generates a matrix with random values. 
The matrix dimensions (shape) is defined by the parameters `input_shape` and `hidden_layer_nodes`
```
X = input size = 2 (number of rows)
Y = hidden_layer_nodes = 4 (number of columns)
```

In [28]:
# Hidden Layer Weights
W1 = np.random.rand(input_size, hidden_layer_nodes)

# Output Layer Weights
W2 = np.random.rand(hidden_layer_nodes, output_size)

In [30]:
W1

array([[0.19967378, 0.51423444, 0.59241457, 0.04645041],
       [0.60754485, 0.17052412, 0.06505159, 0.94888554]])

In [31]:
W1.shape

(2, 4)

In [32]:
W2

array([[0.96563203],
       [0.80839735],
       [0.30461377],
       [0.09767211]])

In [33]:
W2.shape

(4, 1)

#### Calculate bias

In [38]:
# Why is a matrix of zeros? Can be any random number?
B1 = np.zeros((1, hidden_layer_nodes))
B2 = np.zeros((1, output_size))

In [35]:
B1

array([[0., 0., 0., 0.]])

In [39]:
B2

array([[0.]])

#### Define Activation Funtions

In [40]:
## Feedforward function

def relu(x):
  return np.maximum(0, x)

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

## Backpropagation function
def relu_derivative(x):
  return np.where(x > 0, 1, 0).astype(float)

def sigmoid_derivative(x):
  return sigmoid(x) * (1 - sigmoid(x))

#### Training Loop
- About Mean Squared Error https://www.geeksforgeeks.org/mean-squared-error/

In [46]:
# Logger
log_data = []

def save_log_data(epoch: int, loss: float, W1: np.ndarray, W2: np.ndarray, B1: np.ndarray, B2: np.ndarray):
  item = {
    "epoch": epoch,
    "loss": loss,
    "W1": W1.tolist(),
    "W2": W2.tolist(),
    "B1": B1.tolist(),
    "B2": B2.tolist()
  }

  log_data.append(item)

In [None]:
# # Auxiliar methods

# def hidden_layer(X: np.ndarray, W1: np.ndarray, B1: np.ndarray) -> np.ndarray:
#   """
#   X  -> is the input data. It contains all the records and features. Expected matrix dimesion is (4, 2)
#   W1 -> is the weight data. It is a matrix that contains all the weights for all the nodes in the hidden layer. Expected matrix dimenssion is (2,4)
#   B1 -> is the bias data. Expected matrix dimension is (1, 4)

#   Calculation process
#   1 - Linear transformation
#       z = (X * W1) + B1

#   2 - Activation function
#       a = relu(z)
#   """

#   # 1 - Calculate the dot product of the input data and the weights
#   #     Input data dimesion is (4.2)
#   #     Weights dimesion is (2,4)
#   #     Resulting dimension is (4,4)
#   dot_product_result = np.dot(X, W1)

#   # 2 - Add the bias to the dot product result
#   #     Bias dimension is (1,4)
#   #     Resulting dimension is (4,4)
#   dot_product_result_bias = dot_product_result + B1

#   # 3 - Apply the activation function ReLU
#   #     Resulting dimension is (4,4)
#   activated_result = relu(dot_product_result_bias)

#   return activated_result


# def output_layer(A1: np.ndarray, W2: np.ndarray, B2: np.ndarray) -> np.ndarray:
#   """
#   A1 -> result of the hidden layer. Expected matrix dimesion is (4, 4)
#   W2 -> is the weight data. It is a matrix that contains all the weights for all the nodes in the output layer. Expected matrix dimenssion is (4, 1)
#   B1 -> is the bias data. Expected matrix dimension is (1, 1)

#   Calculation process
#   1 - Linear transformation
#       z = (X * W1) + B1

#   2 - Activation function
#       a = relu(z)
#   """

#   # 1 - Calculate the dot product of the input data and the weights
#   #     Input data dimesion is (4.2)
#   #     Weights dimesion is (2,4)
#   #     Resulting dimension is (4,4)
#   dot_product_result = np.dot(A1, W2)

#   # 2 - Add the bias to the dot product result
#   #     Bias dimension is (1,4)
#   #     Resulting dimension is (4,4)
#   dot_product_result_bias = dot_product_result + B2

#   # 3 - Apply the activation function ReLU
#   #     Resulting dimension is (4,4)
#   activated_result = sigmoid(dot_product_result_bias)

#   return activated_result


In [None]:
for epoch in range(num_epochs):

  # Forward Propagation
  Z1 = np.dot(X, W1) + B1   # Input to hidden layer
  A1 = relu(Z1)             # Activation of hidden layer

  Z2 = np.dot(A1, W2) + B2  # Input to output layer
  A2 = sigmoid(Z2)          # Activation of output layer


  

  # Calculate loss
  loss = np.mean((Y - A2) ** 2) # Mean Squared Error

  # Backpropagation
  dA2 = A2 - Y
  dZ2 = dA2 * sigmoid_derivative(Z2)
  dW2 = np.dot(A1.T, dZ2)
  dB2 = np.sum(dZ2, axis=0, keepdims=True)

  dA1 = np.dot(dZ2, W2.T)
  dZ1 = dA1 * relu_derivative(Z1)
  dW1 = np.dot(X.T, dZ1)
  dB1 = np.sum(dZ1, axis=0, keepdims=True)

  # Update weights and biases
  W2 -= learning_rate * dW2
  B2 -= learning_rate * dB2
  W1 -= learning_rate * dW1
  B1 -= learning_rate * dB1

  # Print loss every 10 epochs
  if epoch % 10 == 0:
    #save_log_data(epoch, loss, W1, W2, B1, B2)
    print(f"Epoch: {epoch}, Loss: {loss}")

In [49]:
# Logger
for item in log_data:
  print(f"Epoch: {item['epoch']}, Loss: {item['loss']}")
  print()

Epoch: 0, Loss: 0.22227211772624472

Epoch: 10, Loss: 0.22175984860688952

Epoch: 20, Loss: 0.22123937896581808

Epoch: 30, Loss: 0.22077822623300458

Epoch: 40, Loss: 0.22025100256428104

Epoch: 50, Loss: 0.21975958870336593

Epoch: 60, Loss: 0.21923390537562565

Epoch: 70, Loss: 0.2186768655535238

Epoch: 80, Loss: 0.21820915742654934

Epoch: 90, Loss: 0.2176697110892714

Epoch: 100, Loss: 0.21719396370890848

Epoch: 110, Loss: 0.21666020362606087

Epoch: 120, Loss: 0.2160480277907648

Epoch: 130, Loss: 0.21557946514572365

Epoch: 140, Loss: 0.21496437369729632

Epoch: 150, Loss: 0.21449945815383842

Epoch: 160, Loss: 0.2138841254645089

Epoch: 170, Loss: 0.2134195761654128

Epoch: 180, Loss: 0.21276996173909152

Epoch: 190, Loss: 0.21227589780635162

Epoch: 200, Loss: 0.2117241164889248

Epoch: 210, Loss: 0.21107772385548565

Epoch: 220, Loss: 0.2105751100096449

Epoch: 230, Loss: 0.20992736074071972

Epoch: 240, Loss: 0.20942929998096516

Epoch: 250, Loss: 0.20882194993918782

Epoc

In [45]:
print(f"Final Loss: {loss:.4f}")
print("Final Predictions")
print(np.round(A2))

Final Loss: 0.2223
Final Predictions
[[0.]
 [1.]
 [1.]
 [0.]]
