In [1]:
!pip install numpy
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights and biases
        self.W1 = np.random.randn(self.input_size, self.hidden_size) * 0.01
        self.b1 = np.zeros((1, self.hidden_size))
        self.W2 = np.random.randn(self.hidden_size, self.output_size) * 0.01
        self.b2 = np.zeros((1, self.output_size))

    def relu(self, Z):
        return np.maximum(0, Z)

    def softmax(self, Z):
        exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

    def forward(self, X):
        # Input to hidden layer
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)

        # Hidden to output layer
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.softmax(self.Z2)

        return self.A2

    def compute_loss(self, Y, Y_hat):
        m = Y.shape[0]
        loss = -np.sum(Y * np.log(Y_hat)) / m
        return loss

    def backward(self, X, Y):
        m = X.shape[0]

        # Output layer error
        dZ2 = self.A2 - Y
        dW2 = np.dot(self.A1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        # Hidden layer error
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * (self.Z1 > 0)
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        return dW1, db1, dW2, db2

    def update_parameters(self, dW1, db1, dW2, db2, learning_rate):
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def train(self, X, Y, learning_rate, epochs):
        for epoch in range(epochs):
            # Forward propagation
            Y_hat = self.forward(X)

            # Compute loss
            loss = self.compute_loss(Y, Y_hat)

            # Backpropagation
            dW1, db1, dW2, db2 = self.backward(X, Y)

            # Update parameters
            self.update_parameters(dW1, db1, dW2, db2, learning_rate)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")

    def predict(self, X):
        Y_hat = self.forward(X)
        return np.argmax(Y_hat, axis=1)

# Example usage
if __name__ == "__main__":
    # Create a dataset (XOR problem)
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    Y = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])  # One-hot encoded labels

    # Initialize the neural network
    nn = NeuralNetwork(input_size=2, hidden_size=4, output_size=2)

    # Train the network
    nn.train(X, Y, learning_rate=0.1, epochs=1000)

    # Make predictions
    predictions = nn.predict(X)
    print("Predictions:", predictions)

Epoch 0, Loss: 0.6931347472385577
Epoch 100, Loss: 0.6928933435451863
Epoch 200, Loss: 0.6901817136839058
Epoch 300, Loss: 0.6636907260672227
Epoch 400, Loss: 0.5624112577204465
Epoch 500, Loss: 0.5011293006112542
Epoch 600, Loss: 0.486807040325826
Epoch 700, Loss: 0.4829406845634029
Epoch 800, Loss: 0.481135488399676
Epoch 900, Loss: 0.4802403679532597
Predictions: [0 1 0 0]
