In [1]:
import numpy as np

# Define the sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Define the neural network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        # Initialize weights and biases
        self.weights_input_hidden = np.random.rand(input_size, hidden_size)
        self.bias_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.rand(hidden_size, output_size)
        self.bias_output = np.zeros((1, output_size))
        
        self.learning_rate = learning_rate

    def forward(self, inputs):
        # Forward propagation
        self.hidden_input = np.dot(inputs, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = sigmoid(self.hidden_input)
        self.output = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        return sigmoid(self.output)

    def backward(self, inputs, targets):
        # Backward propagation using the chain rule

        # Calculate the error and delta for the output layer
        output_error = targets - self.output
        output_delta = output_error * sigmoid_derivative(self.output)

        # Update weights and biases for the output layer
        self.weights_hidden_output += self.learning_rate * np.dot(self.hidden_output.T, output_delta)
        self.bias_output += self.learning_rate * np.sum(output_delta, axis=0, keepdims=True)

        # Calculate the error and delta for the hidden layer
        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)
        hidden_delta = hidden_error * sigmoid_derivative(self.hidden_output)

        # Update weights and biases for the hidden layer
        self.weights_input_hidden += self.learning_rate * np.dot(inputs.T, hidden_delta)
        self.bias_hidden += self.learning_rate * np.sum(hidden_delta, axis=0, keepdims=True)

    def train(self, inputs, targets, epochs):
        for epoch in range(epochs):
            # Forward pass
            predictions = self.forward(inputs)

            # Backward pass
            self.backward(inputs, targets)

            # Print the mean squared error for every 1000 epochs
            if epoch % 1000 == 0:
                mse = np.mean((targets - predictions) ** 2)
                print(f"Epoch {epoch}, Mean Squared Error: {mse}")

# Example usage:
# Assuming you have input data `X` and target data `y`
# Make sure to normalize your input data before training
# (e.g., dividing by the maximum value or using z-score normalization)

# Define input and target data
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Initialize and train the neural network
input_size = 2
hidden_size = 4
output_size = 1
learning_rate = 0.1

nn = NeuralNetwork(input_size, hidden_size, output_size, learning_rate)
nn.train(X, y, epochs=10000)

# Test the trained network
predictions = nn.forward(X)
print("Predictions:")
print(predictions)


Epoch 0, Mean Squared Error: 0.3921615954725725
Epoch 1000, Mean Squared Error: nan


  hidden_delta = hidden_error * sigmoid_derivative(self.hidden_output)


Epoch 2000, Mean Squared Error: nan
Epoch 3000, Mean Squared Error: nan
Epoch 4000, Mean Squared Error: nan
Epoch 5000, Mean Squared Error: nan
Epoch 6000, Mean Squared Error: nan
Epoch 7000, Mean Squared Error: nan
Epoch 8000, Mean Squared Error: nan
Epoch 9000, Mean Squared Error: nan
Predictions:
[[nan]
 [nan]
 [nan]
 [nan]]
