In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/xor.csv


In [32]:
import numpy as np
import pandas as pd

# Load and prepare data
data = pd.read_csv('/kaggle/input/xor.csv')
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

# Split into features and labels
X = data[['X1', 'X2']].values
y = data['label'].values.reshape(-1, 1)

# Train-test split (80-20)
split_idx = int(0.8 * len(data))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# Enhanced activation functions with numerical stability
def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -50, 50)))

def sigmoid_derivative(x):
    return x * (1 - x)

# Optimized network parameters
input_size = 2
hidden_size = 4  # Reduced from 16 to 4 for XOR complexity match
output_size = 1
learning_rate = 0.5  # Restored original learning rate
epochs = 50000
epsilon = 1e-8
l2_lambda = 0.001  # L2 regularization

# Improved initialization
np.random.seed(42)
weights_input_hidden = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
weights_hidden_output = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
bias_hidden = np.zeros((1, hidden_size))
bias_output = np.zeros((1, output_size))

# Training loop with early stopping
best_loss = np.inf
patience = 500
no_improvement = 0

for epoch in range(epochs):
    # Forward propagation
    hidden_input = np.dot(X_train, weights_input_hidden) + bias_hidden
    hidden_output = sigmoid(hidden_input)
    
    output_input = np.dot(hidden_output, weights_hidden_output) + bias_output
    predicted_output = sigmoid(output_input)
    
    # Regularized loss calculation
    data_loss = -np.mean(y_train * np.log(predicted_output + epsilon) + 
                (1 - y_train) * np.log(1 - predicted_output + epsilon))
    reg_loss = 0.5 * l2_lambda * (np.sum(weights_input_hidden**2) + 
               np.sum(weights_hidden_output**2)) / len(X_train)
    total_loss = data_loss + reg_loss
    
    # Early stopping check
    if total_loss < best_loss:
        best_loss = total_loss
        no_improvement = 0
    else:
        no_improvement += 1
        
    if no_improvement >= patience:
        print(f"Early stopping at epoch {epoch}")
        break
    
    # Backpropagation with regularization
    d_error = (predicted_output - y_train) / len(X_train)
    
    # Output layer gradients
    d_weights_hidden_output = np.dot(hidden_output.T, d_error) + l2_lambda * weights_hidden_output / len(X_train)
    d_bias_output = np.sum(d_error, axis=0, keepdims=True)
    
    # Hidden layer gradients
    d_hidden = np.dot(d_error, weights_hidden_output.T) * sigmoid_derivative(hidden_output)
    d_weights_input_hidden = np.dot(X_train.T, d_hidden) + l2_lambda * weights_input_hidden / len(X_train)
    d_bias_hidden = np.sum(d_hidden, axis=0, keepdims=True)
    
    # Update parameters
    weights_hidden_output -= learning_rate * d_weights_hidden_output
    bias_output -= learning_rate * d_bias_output
    weights_input_hidden -= learning_rate * d_weights_input_hidden
    bias_hidden -= learning_rate * d_bias_hidden

    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, Loss: {total_loss:.4f}')

# Evaluation function
def evaluate(X, y_true):
    hidden_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_output = sigmoid(hidden_input)
    output_input = np.dot(hidden_output, weights_hidden_output) + bias_output
    predictions = (sigmoid(output_input) > 0.5).astype(int)
    accuracy = np.mean(predictions == y_true)
    return accuracy

# Calculate metrics
train_accuracy = evaluate(X_train, y_train)
test_accuracy = evaluate(X_test, y_test)

print(f"\nTraining Accuracy: {train_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Epoch 0, Loss: 0.7256
Epoch 1000, Loss: 0.6864
Epoch 2000, Loss: 0.5383
Epoch 3000, Loss: 0.2103
Epoch 4000, Loss: 0.1397
Epoch 5000, Loss: 0.1117
Epoch 6000, Loss: 0.0965
Epoch 7000, Loss: 0.0869
Epoch 8000, Loss: 0.0801
Epoch 9000, Loss: 0.0751
Epoch 10000, Loss: 0.0711
Epoch 11000, Loss: 0.0679
Epoch 12000, Loss: 0.0653
Epoch 13000, Loss: 0.0630
Epoch 14000, Loss: 0.0611
Epoch 15000, Loss: 0.0594
Epoch 16000, Loss: 0.0579
Epoch 17000, Loss: 0.0566
Epoch 18000, Loss: 0.0554
Epoch 19000, Loss: 0.0543
Epoch 20000, Loss: 0.0533
Epoch 21000, Loss: 0.0524
Epoch 22000, Loss: 0.0516
Epoch 23000, Loss: 0.0508
Epoch 24000, Loss: 0.0501
Epoch 25000, Loss: 0.0495
Epoch 26000, Loss: 0.0489
Epoch 27000, Loss: 0.0483
Epoch 28000, Loss: 0.0478
Epoch 29000, Loss: 0.0473
Epoch 30000, Loss: 0.0468
Epoch 31000, Loss: 0.0464
Epoch 32000, Loss: 0.0459
Epoch 33000, Loss: 0.0455
Epoch 34000, Loss: 0.0451
Epoch 35000, Loss: 0.0448
Epoch 36000, Loss: 0.0444
Epoch 37000, Loss: 0.0441
Epoch 38000, Loss: 0.0438