# A minimal neural network 

In [4]:
import numpy as np

class FeedforwardNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights with random values
        self.weights1 = np.random.randn(self.input_size, self.hidden_size)
        self.weights2 = np.random.randn(self.hidden_size, self.output_size)

    def forward(self, X):
        # Calculate the output of the network given an input X
        self.hidden_layer_output = np.maximum(0, np.dot(X, self.weights1))
        self.output_layer_output = np.dot(self.hidden_layer_output, self.weights2)
        self.output_layer_activation = self.sigmoid(self.output_layer_output)
        return self.output_layer_activation

    def sigmoid(self, x):
        # Sigmoid activation function
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        # Derivative of the sigmoid function
        return x * (1 - x)

    def fit(self, X, y, learning_rate, epochs):
        for epoch in range(epochs):
            # Forward propagation
            output = self.forward(X)

            # Backpropagation
            error = y - output
            output_delta = error * self.sigmoid_derivative(output)
            hidden_delta = np.dot(output_delta, self.weights2.T) * (self.hidden_layer_output > 0)

            # Update weights
            self.weights2 += learning_rate * np.dot(self.hidden_layer_output.T, output_delta)
            self.weights1 += learning_rate * np.dot(X.T, hidden_delta)

            # Print the mean squared error every 100 epochs
            if epoch % 100 == 0:
                mse = np.mean(np.square(error))
                print(f"Epoch {epoch}: Mean Squared Error = {mse:.4f}")


# Example usage
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1], [0, 1], [1, 0]])
y = np.array([[0], [1], [1], [0], [1], [1]])

# Create a feedforward network with 2 input units, 2 hidden units, and 1 output unit
network = FeedforwardNetwork(input_size=2, hidden_size=2, output_size=1)

# Train the network
network.fit(X, y, learning_rate=0.1, epochs=2000)

# Make predictions
predictions = network.forward(X)
print(predictions)


Epoch 0: Mean Squared Error = 0.2298
Epoch 100: Mean Squared Error = 0.1689
Epoch 200: Mean Squared Error = 0.1677
Epoch 300: Mean Squared Error = 0.1672
Epoch 400: Mean Squared Error = 0.1670
Epoch 500: Mean Squared Error = 0.1670
Epoch 600: Mean Squared Error = 0.1669
Epoch 700: Mean Squared Error = 0.1669
Epoch 800: Mean Squared Error = 0.1669
Epoch 900: Mean Squared Error = 0.1668
Epoch 1000: Mean Squared Error = 0.1668
Epoch 1100: Mean Squared Error = 0.1668
Epoch 1200: Mean Squared Error = 0.1668
Epoch 1300: Mean Squared Error = 0.1668
Epoch 1400: Mean Squared Error = 0.1667
Epoch 1500: Mean Squared Error = 0.1668
Epoch 1600: Mean Squared Error = 0.1667
Epoch 1700: Mean Squared Error = 0.1667
Epoch 1800: Mean Squared Error = 0.1667
Epoch 1900: Mean Squared Error = 0.1667
[[0.5       ]
 [0.5       ]
 [0.98531316]
 [0.5       ]
 [0.5       ]
 [0.98531316]]


# Remark

In a feedforward neural network, the weight update process in the backpropagation algorithm involves adjusting the weights to minimize the error between the predicted output and the target output. The weight update equations are as follows:


# Update the weights connecting the hidden layer to the output layer
weights2 = weights2 + learning_rate * hidden_layer_output.T.dot(output_delta)

# Update the weights connecting the input layer to the hidden layer
weights1 = weights1 + learning_rate * input_data.T.dot(hidden_delta)


Here, `weights2` represents the connections between the hidden layer and the output layer, and `weights1` represents the connections between the input layer and the hidden layer. The learning_rate determines the step size of the weight update.

To update `weights2`, we calculate the change or delta in the weights based on the error in the output layer and the output of the hidden layer. This change is then added to the existing `weights2`.

To update `weights1`, we calculate the change or delta in the weights based on the error in the hidden layer, the input data, and the output delta from the output layer. This change is then added to the existing `weights1`.

The dot product (`dot()`) between matrices is used to perform the matrix multiplication necessary for weight updates. This allows the neural network to adjust its weights in the direction that reduces the error and improves its performance over time.

