In [10]:
import numpy as np

# Define the sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Define the XOR dataset
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

# Define the corresponding labels
y = np.array([[0],
              [1],
              [1],
              [0]])

# Set the random seed for reproducibility
np.random.seed(1)

# Initialize weights randomly with mean 0
input_neurons = 2
hidden_neurons = 5
output_neurons = 1

# Initialize biases randomly with mean 0
bias_hidden = np.random.uniform(size=(1, hidden_neurons))
bias_output = np.random.uniform(size=(1, output_neurons))

# Weights connecting the input layer to the hidden layer
weights_input_hidden = np.random.uniform(size=(input_neurons, hidden_neurons))

# Weights connecting the hidden layer to the output layer
weights_hidden_output = np.random.uniform(size=(hidden_neurons, output_neurons))

# Training the neural network
epochs = 10000
learning_rate = 0.1

for epoch in range(epochs):
    # Forward propagation
    input_layer = X
    hidden_layer_input = np.dot(input_layer, weights_input_hidden) + bias_hidden
    hidden_layer_output = sigmoid(hidden_layer_input)
    
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    output_layer_output = sigmoid(output_layer_input)
    
    # Backpropagation
    # Compute the error
    error = y - output_layer_output
    
    # Compute the gradients at the output layer
    d_output = error * sigmoid_derivative(output_layer_output)
    
    # Compute the gradients at the hidden layer
    error_hidden_layer = d_output.dot(weights_hidden_output.T)
    d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)
    
    # Update weights using gradients
    weights_hidden_output += hidden_layer_output.T.dot(d_output) * learning_rate
    weights_input_hidden += input_layer.T.dot(d_hidden_layer) * learning_rate
    
    # Update biases using gradients
    bias_output += np.sum(d_output, axis=0) * learning_rate
    bias_hidden += np.sum(d_hidden_layer, axis=0) * learning_rate

# Testing the trained network
hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
hidden_layer_output = sigmoid(hidden_layer_input)

output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
output_layer_output = sigmoid(output_layer_input)

print("Output after training:")
print(output_layer_output)


Output after training:
[[0.04576893]
 [0.95279293]
 [0.94579544]
 [0.0559509 ]]


# 3 layers

In [8]:
import numpy as np

# Define the sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Define the XOR dataset
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

# Define the corresponding labels
y = np.array([[0],
              [1],
              [1],
              [0]])

# Set the random seed for reproducibility
np.random.seed(1)

# Initialize weights randomly with mean 0
input_neurons = 2
hidden_neurons1 = 5
hidden_neurons2 = 4
output_neurons = 1

# Initialize biases randomly with mean 0
bias_hidden1 = np.random.uniform(size=(1, hidden_neurons1))
bias_hidden2 = np.random.uniform(size=(1, hidden_neurons2))
bias_output = np.random.uniform(size=(1, output_neurons))

# Weights connecting the input layer to the first hidden layer
weights_input_hidden1 = np.random.uniform(size=(input_neurons, hidden_neurons1))

# Weights connecting the first hidden layer to the second hidden layer
weights_hidden1_hidden2 = np.random.uniform(size=(hidden_neurons1, hidden_neurons2))

# Weights connecting the second hidden layer to the output layer
weights_hidden2_output = np.random.uniform(size=(hidden_neurons2, output_neurons))

# Training the neural network
epochs = 100000
learning_rate = 0.1

for epoch in range(epochs):
    # Forward propagation
    input_layer = X
    hidden_layer_input1 = np.dot(input_layer, weights_input_hidden1) + bias_hidden1
    hidden_layer_output1 = sigmoid(hidden_layer_input1)
    
    hidden_layer_input2 = np.dot(hidden_layer_output1, weights_hidden1_hidden2) + bias_hidden2
    hidden_layer_output2 = sigmoid(hidden_layer_input2)
    
    output_layer_input = np.dot(hidden_layer_output2, weights_hidden2_output) + bias_output
    output_layer_output = sigmoid(output_layer_input)
    
    # Backpropagation
    # Compute the error
    error = y - output_layer_output
    
    # Compute the gradients at the output layer
    d_output = error * sigmoid_derivative(output_layer_output)
    
    # Compute the gradients at the second hidden layer
    error_hidden_layer2 = d_output.dot(weights_hidden2_output.T)
    d_hidden_layer2 = error_hidden_layer2 * sigmoid_derivative(hidden_layer_output2)
    
    # Compute the gradients at the first hidden layer
    error_hidden_layer1 = d_hidden_layer2.dot(weights_hidden1_hidden2.T)
    d_hidden_layer1 = error_hidden_layer1 * sigmoid_derivative(hidden_layer_output1)
    
    # Update weights using gradients
    weights_hidden2_output += hidden_layer_output2.T.dot(d_output) * learning_rate
    weights_hidden1_hidden2 += hidden_layer_output1.T.dot(d_hidden_layer2) * learning_rate
    weights_input_hidden1 += input_layer.T.dot(d_hidden_layer1) * learning_rate
    
    # Update biases using gradients
    bias_output += np.sum(d_output, axis=0) * learning_rate
    bias_hidden2 += np.sum(d_hidden_layer2, axis=0) * learning_rate
    bias_hidden1 += np.sum(d_hidden_layer1, axis=0) * learning_rate

# Testing the trained network
hidden_layer_input1 = np.dot(X, weights_input_hidden1) + bias_hidden1
hidden_layer_output1 = sigmoid(hidden_layer_input1)

hidden_layer_input2 = np.dot(hidden_layer_output1, weights_hidden1_hidden2) + bias_hidden2
hidden_layer_output2 = sigmoid(hidden_layer_input2)

output_layer_input = np.dot(hidden_layer_output2, weights_hidden2_output) + bias_output
output_layer_output = sigmoid(output_layer_input)

print("Output after training:")
print(output_layer_output)


Output after training:
[[0.00442333]
 [0.99172646]
 [0.99181792]
 [0.0102492 ]]
