In [40]:
import numpy as np
import pandas as pd

# Load and prepare data
data = pd.read_csv('/kaggle/input/xor.csv')
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

# Split into features and labels
X = data[['X1', 'X2']].values
y = data['label'].values.reshape(-1, 1)

# Train-test split (80-20)
split_idx = int(0.8 * len(data))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# Define the MLP class
class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.5, l2_lambda=0.001):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.l2_lambda = l2_lambda
        
        # Initialize weights and biases
        np.random.seed(42)  # Ensure reproducibility
        self.weights_input_hidden = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.weights_hidden_output = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.bias_hidden = np.zeros((1, hidden_size))
        self.bias_output = np.zeros((1, output_size))
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -50, 50)))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def forward(self, X):
        # Forward pass
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = self.sigmoid(self.hidden_input)
        
        self.output_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.predicted_output = self.sigmoid(self.output_input)
        return self.predicted_output
    
    def backward(self, X, y):
        # Backward pass
        m = len(X)  # Number of samples
        
        # Output layer error
        d_error = (self.predicted_output - y) / m
        
        # Gradients for output layer
        d_weights_hidden_output = np.dot(self.hidden_output.T, d_error) + \
                                  self.l2_lambda * self.weights_hidden_output / m
        d_bias_output = np.sum(d_error, axis=0, keepdims=True)
        
        # Hidden layer error
        d_hidden = np.dot(d_error, self.weights_hidden_output.T) * self.sigmoid_derivative(self.hidden_output)
        
        # Gradients for hidden layer
        d_weights_input_hidden = np.dot(X.T, d_hidden) + \
                                 self.l2_lambda * self.weights_input_hidden / m
        d_bias_hidden = np.sum(d_hidden, axis=0, keepdims=True)
        
        return d_weights_input_hidden, d_bias_hidden, d_weights_hidden_output, d_bias_output
    
    def update_parameters(self, d_weights_input_hidden, d_bias_hidden, d_weights_hidden_output, d_bias_output):
        # Update weights and biases
        self.weights_input_hidden -= self.learning_rate * d_weights_input_hidden
        self.bias_hidden -= self.learning_rate * d_bias_hidden
        self.weights_hidden_output -= self.learning_rate * d_weights_hidden_output
        self.bias_output -= self.learning_rate * d_bias_output
    
    def compute_loss(self, y_true):
        # Binary cross-entropy loss with L2 regularization
        epsilon = 1e-8
        data_loss = -np.mean(y_true * np.log(self.predicted_output + epsilon) + \
                    (1 - y_true) * np.log(1 - self.predicted_output + epsilon))
        reg_loss = 0.5 * self.l2_lambda * (np.sum(self.weights_input_hidden**2) + \
                   np.sum(self.weights_hidden_output**2)) / len(y_true)
        total_loss = data_loss + reg_loss
        return total_loss
    
    def evaluate(self, X, y_true):
        # Evaluate accuracy
        predictions = self.forward(X)
        predictions = (predictions > 0.5).astype(int)
        accuracy = np.mean(predictions == y_true)
        return accuracy

# Initialize MLP
mlp = MLP(input_size=2, hidden_size=4, output_size=1, learning_rate=0.5, l2_lambda=0.001)

# Training loop with early stopping
best_loss = np.inf
patience = 500
no_improvement = 0
epochs = 50000

for epoch in range(epochs):
    # Forward pass
    mlp.forward(X_train)
    
    # Compute loss
    loss = mlp.compute_loss(y_train)
    
    # Early stopping check
    if loss < best_loss:
        best_loss = loss
        no_improvement = 0
    else:
        no_improvement += 1
        
    if no_improvement >= patience:
        print(f"Early stopping at epoch {epoch}")
        break
    
    # Backward pass
    d_weights_input_hidden, d_bias_hidden, d_weights_hidden_output, d_bias_output = mlp.backward(X_train, y_train)
    
    # Update parameters
    mlp.update_parameters(d_weights_input_hidden, d_bias_hidden, d_weights_hidden_output, d_bias_output)
    
    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, Loss: {loss:.4f}')

# Evaluate on training and test sets
train_accuracy = mlp.evaluate(X_train, y_train)
test_accuracy = mlp.evaluate(X_test, y_test)

print(f"\nTraining Accuracy: {train_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Epoch 0, Loss: 0.7256
Epoch 1000, Loss: 0.6864
Epoch 2000, Loss: 0.5383
Epoch 3000, Loss: 0.2103
Epoch 4000, Loss: 0.1397
Epoch 5000, Loss: 0.1117
Epoch 6000, Loss: 0.0965
Epoch 7000, Loss: 0.0869
Epoch 8000, Loss: 0.0801
Epoch 9000, Loss: 0.0751
Epoch 10000, Loss: 0.0711
Epoch 11000, Loss: 0.0679
Epoch 12000, Loss: 0.0653
Epoch 13000, Loss: 0.0630
Epoch 14000, Loss: 0.0611
Epoch 15000, Loss: 0.0594
Epoch 16000, Loss: 0.0579
Epoch 17000, Loss: 0.0566
Epoch 18000, Loss: 0.0554
Epoch 19000, Loss: 0.0543
Epoch 20000, Loss: 0.0533
Epoch 21000, Loss: 0.0524
Epoch 22000, Loss: 0.0516
Epoch 23000, Loss: 0.0508
Epoch 24000, Loss: 0.0501
Epoch 25000, Loss: 0.0495
Epoch 26000, Loss: 0.0489
Epoch 27000, Loss: 0.0483
Epoch 28000, Loss: 0.0478
Epoch 29000, Loss: 0.0473
Epoch 30000, Loss: 0.0468
Epoch 31000, Loss: 0.0464
Epoch 32000, Loss: 0.0459
Epoch 33000, Loss: 0.0455
Epoch 34000, Loss: 0.0451
Epoch 35000, Loss: 0.0448
Epoch 36000, Loss: 0.0444
Epoch 37000, Loss: 0.0441
Epoch 38000, Loss: 0.0438