In [2]:
import numpy as np

class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))
    
    def relu(self, x):
        return np.maximum(0, x)
    
    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)
    
    def forward(self, X):
        # First layer
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.relu(self.z1)
        
        # Output layer
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.out = self.softmax(self.z2)
        
        return self.out
    
    def backward(self, X, y, learning_rate=0.01):
        # Number of samples
        m = X.shape[0]
        
        # Compute gradients
        dZ2 = self.out - y
        dW2 = np.dot(self.a1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m
        
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * (self.z1 > 0)
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m
        
        # Update weights and biases
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1

In [3]:
# Simple dataset: two clusters of points
X = np.vstack([
    np.random.randn(100, 2) + np.array([2, 2]),  # Class 0
    np.random.randn(100, 2) + np.array([-2, -2])  # Class 1
])
y = np.hstack([np.zeros(100), np.ones(100)]).astype(int)

# Create and train network
nn = SimpleNeuralNetwork(input_size=2, hidden_size=3, output_size=2)

# Training loop
for epoch in range(1000):
    # Forward pass
    pred = nn.forward(X)
    
    # Compute loss
    loss = -np.sum(np.log(pred[range(len(y)), y])) / len(y)

    nn.backward(X, np.eye(2)[y], learning_rate=0.01)
    
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

Epoch 0, Loss: 0.6930
Epoch 100, Loss: 0.6919
Epoch 200, Loss: 0.6825
Epoch 300, Loss: 0.6184
Epoch 400, Loss: 0.4471
Epoch 500, Loss: 0.3054
Epoch 600, Loss: 0.1947
Epoch 700, Loss: 0.1125
Epoch 800, Loss: 0.0696
Epoch 900, Loss: 0.0488


In [12]:
# Your weights and biases
print("W1:", nn.W1)
print("b1:", nn.b1)
print("W2:", nn.W2)
print("b2:", nn.b2)

W1: [[-0.68148114  0.66099103  0.41560136]
 [-0.64816408  0.75636262  0.5073244 ]]
b1: [[0.29751054 0.51102386 0.33072266]]
W2: [[-0.68974308  0.70510598]
 [ 0.80325263 -0.79058361]
 [ 0.51936748 -0.51913441]]
b2: [[-0.44384601  0.44384601]]
