In [1]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size)
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b1 = np.zeros((1, hidden_size))
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = sigmoid(self.z2)
        return self.a2

    def backward(self, X, y, output):
        m = X.shape[0]
        
        # Output layer
        dz2 = output - y
        dW2 = (1 / m) * np.dot(self.a1.T, dz2)
        db2 = (1 / m) * np.sum(dz2, axis=0, keepdims=True)
        
        # Hidden layer
        dz1 = np.dot(dz2, self.W2.T) * sigmoid_derivative(self.a1)
        dW1 = (1 / m) * np.dot(X.T, dz1)
        db1 = (1 / m) * np.sum(dz1, axis=0, keepdims=True)
        
        return dW1, db1, dW2, db2

    def train(self, X, y, learning_rate, epochs):
        for _ in range(epochs):
            output = self.forward(X)
            dW1, db1, dW2, db2 = self.backward(X, y, output)
            
            self.W1 -= learning_rate * dW1
            self.b1 -= learning_rate * db1
            self.W2 -= learning_rate * dW2
            self.b2 -= learning_rate * db2

# Example usage
X = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
y = np.array([[0], [1], [1], [0]])

nn = SimpleNeuralNetwork(3, 4, 1)
nn.train(X, y, learning_rate=0.1, epochs=10000)

# Test the trained network
print(nn.forward(X))

[[0.01312286]
 [0.9860211 ]
 [0.98626052]
 [0.01759284]]


In [2]:
import torch

# Create tensors with requires_grad=True to track computations
x = torch.tensor([2.0], requires_grad=True)
y = torch.tensor([3.0], requires_grad=True)

# Perform some operations
z = x**2 + y**3

# Compute the gradients
z.backward()

# Print the gradients
print(f"dz/dx: {x.grad}")  # Should be 4.0 (2 * x)
print(f"dz/dy: {y.grad}")  # Should be 27.0 (3 * y^2)

dz/dx: tensor([4.])
dz/dy: tensor([27.])


  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


In [1]:
import numpy as np

class SimpleNeuralNetwork:
    def __init__(self):
        # Initialize weights and biases
        self.W1 = np.array([[0.15, 0.20],  # 2x2 weight matrix for hidden layer
                           [0.25, 0.30]])
        self.b1 = np.array([[0.35],        # 2x1 bias vector for hidden layer
                           [0.35]])
        self.W2 = np.array([[0.40, 0.45]]) # 1x2 weight matrix for output layer
        self.b2 = np.array([[0.60]])       # 1x1 bias for output layer
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def forward(self, X):
        # Forward propagation
        # First layer computation
        self.z1 = np.dot(self.W1, X) + self.b1  # Input to hidden layer
        self.a1 = self.sigmoid(self.z1)         # Hidden layer activation
        
        # Second layer computation
        self.z2 = np.dot(self.W2, self.a1) + self.b2  # Input to output layer
        self.a2 = self.sigmoid(self.z2)         # Output layer activation
        
        return self.a2
    
    def backward(self, X, y, learning_rate=0.1):
        m = X.shape[1]  # number of examples
        
        # Compute gradients for output layer (layer 2)
        dZ2 = self.a2 - y
        dW2 = (1/m) * np.dot(dZ2, self.a1.T)
        db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
        
        # Compute gradients for hidden layer (layer 1)
        dZ1 = np.multiply(np.dot(self.W2.T, dZ2), self.sigmoid_derivative(self.a1))
        dW1 = (1/m) * np.dot(dZ1, X.T)
        db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
        
        # Update parameters
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        
        return np.sum((y - self.a2) ** 2) / (2 * m)  # Return MSE loss

# Example usage
if __name__ == "__main__":
    # Create a simple dataset
    X = np.array([[0.05],
                  [0.10]])  # Input features
    y = np.array([[0.01]])  # Target output
    
    # Initialize and train network
    nn = SimpleNeuralNetwork()
    
    # Forward pass
    output = nn.forward(X)
    print("Initial prediction:", output)
    
    # Single backward pass
    loss = nn.backward(X, y)
    print("Loss after one step:", loss)

Initial prediction: [[0.75136507]]
Loss after one step: 0.274811083176155
