In [1]:
import numpy as np


# Define activation functions and their derivatives
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def softmax_derivative(x):
    exp_x = np.exp(x - np)
    
# Cross-entropy loss
def cross_entropy_loss(y_true, y_pred):
    n_samples = y_true.shape[0]
    log_p = - np.log(y_pred[range(n_samples), y_true.argmax(axis=1)])
    loss = np.sum(log_p) / n_samples
    return loss

# Cross-entropy is the COST function. The derivate is according to one-hot encoded labels...
def cross_entropy_derivative(y_true, y_pred):
    return y_pred - y_true

# Initialize the neural network
class NeuralNetwork:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        self.params = {
            "W1": np.random.randn(input_size, hidden_size1) * 0.01,
            "b1": np.zeros((1, hidden_size1)),
            "W2": np.random.randn(hidden_size1, hidden_size2) * 0.01,
            "b2": np.zeros((1, hidden_size2)),
            "W3": np.random.randn(hidden_size2, output_size) * 0.01,
            "b3": np.zeros((1, output_size))
        }

    def forward(self, X):
        params = self.params

        # First layer
        params['Z1'] = np.dot(X, params['W1']) + params['b1']
        # params['A1'] = relu(params['Z1'])
        params['A1'] = softmax(params['Z1'])

        # Second layer
        params['Z2'] = np.dot(params['A1'], params['W2']) + params['b2']
        # params['A2'] = relu(params['Z2'])
        params['A2'] = softmax(params['Z2'])

        # Output layer
        params['Z3'] = np.dot(params['A2'], params['W3']) + params['b3']
        params['A3'] = softmax(params['Z3'])

        return params['A3']

    def backward(self, X, y_true, learning_rate):
        params = self.params
        m = y_true.shape[0]

        # Output layer gradients
        dZ3 = cross_entropy_derivative(y_true, params['A3'])    
        dW3 = np.dot(params['A2'].T, dZ3) / m
        db3 = np.sum(dZ3, axis=0, keepdims=True) / m

        # Second layer gradients
        dA2 = np.dot(dZ3, params['W3'].T)
        # dZ2 = dA2 * relu_derivative(params['Z2'])
        dZ2 = dA2 * cross_entropy_derivative(params['Z2'])
        dW2 = np.dot(params['A1'].T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        # First layer gradients
        dA1 = np.dot(dZ2, params['W2'].T)
        # dZ1 = dA1 * relu_derivative(params['Z1'])
        dZ1 = dA1 * cross_entropy_derivative(params['Z1'])
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        # Update parameters
        params['W1'] -= learning_rate * dW1
        params['b1'] -= learning_rate * db1
        params['W2'] -= learning_rate * dW2
        params['b2'] -= learning_rate * db2
        params['W3'] -= learning_rate * dW3
        params['b3'] -= learning_rate * db3

    def train(self, X, y_true, epochs, learning_rate):
        for epoch in range(epochs):
            # Forward pass
            y_pred = self.forward(X)

            # Compute loss
            loss = cross_entropy_loss(y_true, y_pred)
            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

            # Backward pass
            self.backward(X, y_true, learning_rate)

# One-hot encoding function
def one_hot_encode(y, num_classes):
    one_hot = np.zeros((y.size, num_classes))
    one_hot[np.arange(y.size), y] = 1
    return one_hot

# Sample data
np.random.seed(42)
X_train = np.random.randn(100, 3)  # 100 samples, 3 features
y_train = np.random.randint(0, 2, 100)  # 100 samples, binary classification
y_train_one_hot = one_hot_encode(y_train, 2)

# Create and train the neural network
nn = NeuralNetwork(input_size=3, hidden_size1=5, hidden_size2=4, output_size=2)
nn.train(X_train, y_train_one_hot, epochs=1000, learning_rate=0.01)

Epoch 1/1000, Loss: 0.6928


TypeError: cross_entropy_derivative() missing 1 required positional argument: 'y_pred'