In [5]:
import numpy as np

# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Softmax function for the output layer
def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Stability
    return e_x / np.sum(e_x, axis=1, keepdims=True)

# XOR input data
X = np.array([[1, 0], [0, 1], [0, 0], [1, 1]])  # Four possible inputs for XOR
y = np.array([1, 1, 0, 0])  # Labels for XOR

# First layer parameters based on the notes
beta1 = np.array([[1, 1]])  # Values for beta^{[1]} (reshaped to 1x2)
alpha1 = np.array([0, -1])  # Values for alpha^{[1]} (bias term, shape (2,))

# Second layer parameters (for softmax output layer)
gamma = np.array([1.1, -1])  # Values for gamma
nu = np.array([-0.3])    # Values for nu

# Compute z^{[1]} for each input X
def forward_first_layer(X, beta1, alpha1):
    z1 = np.dot(X, beta1.T) + alpha1  # Corrected to make shapes match
    a1 = sigmoid(z1)  # Apply sigmoid activation
    return a1

# Compute final softmax output
def forward_second_layer(a1, gamma, nu):
    z2 = np.dot(a1, gamma) + nu  # Final transformation before softmax
    return z2  # No softmax needed as we're just using sigmoid later

# Forward pass through the first layer (sigmoid)
a1 = forward_first_layer(X, beta1, alpha1)

# Forward pass through the second layer (linear + sigmoid)
z2 = forward_second_layer(a1, gamma, nu)
softmax_output = sigmoid(z2)  # Apply sigmoid for final classification probability
print("Final Output after sigmoid transformation:", softmax_output)

# Classification based on thresholding
predictions = (softmax_output > 0.5).astype(int)
print("Final predictions (class 1 probability > 0.5):", predictions)


Final Output after sigmoid transformation: [0.50104111 0.50104111 0.49526479 0.48445956]
Final predictions (class 1 probability > 0.5): [1 1 0 0]
