<a href="https://colab.research.google.com/github/Ying3272/Intro-to-Astro2022/blob/master/XOR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [86]:
import numpy as np


X = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])
y = np.array([[0], [1], [1], [0]])

w1 = np.random.randn(3, 2)
w2 = np.random.randn(3, 1)

In [87]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)


# Mean Squared Error
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# XOR network function
def xor_net(inputs, w1, w2):
    hidden_layer_input = np.dot(inputs, w1)
    hidden_layer_output = sigmoid(hidden_layer_input)

    hidden_with_bias = np.hstack([hidden_layer_output, np.ones((hidden_layer_output.shape[0], 1))])
    output_layer_input = np.dot(hidden_with_bias, w2)
    output = sigmoid(output_layer_input)
    return output, hidden_with_bias

# Gradient of MSE with respect to weights
def compute_gradient(inputs, targets, w1, w2):
    outputs, hidden_outputs = xor_net(inputs, w1, w2)

    # Compute the gradient for w2
    d_mse = 2 * (outputs - targets) / len(targets)
    d_w2 = np.dot(hidden_outputs.T, d_mse * sigmoid_derivative(outputs))

    # Compute the gradient for w1
    d_hidden = np.dot(d_mse * sigmoid_derivative(outputs), w2.T[:,:-1])
    d_w1 = np.dot(inputs.T, d_hidden * sigmoid_derivative(hidden_outputs[:,:-1]))

    return d_w1, d_w2

def count_misclassified(y_true, y_pred):
    rounded_predictions = np.round(y_pred)
    misclassified = np.sum(rounded_predictions != y_true)
    return misclassified


def train_nn(X, y, w1, w2, epochs, learning_rate):

    for epoch in range(epochs):
        dw1, dw2 = compute_gradient(X, y, w1, w2)
        w1 -= learning_rate * dw1
        w2 -= learning_rate * dw2

        # Monitor values at integer multiples of 100 epochs
        if (epoch + 1) % 100 == 0:
            # Compute predictions for monitoring
            predictions, _ = xor_net(X, w1, w2)

            # Compute MSE and number of misclassified inputs for monitoring
            current_mse = mse(y, predictions)
            misclassified = count_misclassified(y, predictions)

            # Print the monitored values
            print(f"Epoch {epoch + 1}: MSE = {current_mse:.4f}, Misclassified = {misclassified}")

    return w1, w2




w1, w2 = train_nn(X, y, w1, w2, epochs=10000, learning_rate=1)

# Test
predictions, _ = xor_net(X, w1, w2)
print(np.round(predictions))

Epoch 100: MSE = 0.2474, Misclassified = 2
Epoch 200: MSE = 0.2368, Misclassified = 2
Epoch 300: MSE = 0.2161, Misclassified = 1
Epoch 400: MSE = 0.1937, Misclassified = 1
Epoch 500: MSE = 0.1654, Misclassified = 1
Epoch 600: MSE = 0.0926, Misclassified = 0
Epoch 700: MSE = 0.0376, Misclassified = 0
Epoch 800: MSE = 0.0201, Misclassified = 0
Epoch 900: MSE = 0.0130, Misclassified = 0
Epoch 1000: MSE = 0.0095, Misclassified = 0
Epoch 1100: MSE = 0.0073, Misclassified = 0
Epoch 1200: MSE = 0.0060, Misclassified = 0
Epoch 1300: MSE = 0.0050, Misclassified = 0
Epoch 1400: MSE = 0.0043, Misclassified = 0
Epoch 1500: MSE = 0.0037, Misclassified = 0
Epoch 1600: MSE = 0.0033, Misclassified = 0
Epoch 1700: MSE = 0.0030, Misclassified = 0
Epoch 1800: MSE = 0.0027, Misclassified = 0
Epoch 1900: MSE = 0.0025, Misclassified = 0
Epoch 2000: MSE = 0.0023, Misclassified = 0
Epoch 2100: MSE = 0.0021, Misclassified = 0
Epoch 2200: MSE = 0.0020, Misclassified = 0
Epoch 2300: MSE = 0.0018, Misclassified =

In [93]:
#accuracy

def train_nn(X, y, epochs, learning_rate):

    w1 = np.random.randn(3, 2)
    w2 = np.random.randn(3, 1)

    for epoch in range(epochs):
        dw1, dw2 = compute_gradient(X, y, w1, w2)
        w1 -= learning_rate * dw1
        w2 -= learning_rate * dw2

        # Monitor values at integer multiples of 100 epochs
        if (epoch + 1) % 100 == 0:
            # Compute predictions for monitoring
            predictions, _ = xor_net(X, w1, w2)


    return w1, w2

total_accuracy = 0

# Number of runs
num_runs = 100

for _ in range(num_runs):
    # Train the neural network
    w1, w2 = train_nn(X, y, epochs=10000, learning_rate=0.5)

    # Test
    predictions, _ = xor_net(X, w1, w2)

    # Compute accuracy for this run
    accuracy = compute_accuracy(y, predictions)
    total_accuracy += accuracy

# Compute average accuracy over all runs
average_accuracy = total_accuracy / num_runs
print(f"Average Accuracy over {num_runs} runs: {average_accuracy * 100:.2f}%")








Average Accuracy over 100 runs: 89.00%
