In [1]:
import numpy as np

In [72]:
class ReLUNode:
    def __init__(self, input_size):
        self.weights = np.random.rand(input_size) * 0.01
        self.bias = np.random.random() * 0.01
        self.inputs = None
        self.output = None

    def forward(self, inputs):
        self.inputs = x
        self.output = np.maximum(0.0, inputs.dot(self.weights) + self.bias)
        return self.output

    def backward(self, d_output):
        d_output = d_output if self.output > 0 else 0.0
        d_weights = self.inputs * d_output
        d_inputs = self.weights * d_output
        d_bias = d_output
        return d_inputs, d_weights, d_bias

In [5]:
class ReLULayer:
    def __init__(self, input_size, output_size):
        self.W = np.random.randn(input_size, output_size) * np.sqrt(2 / input_size) # weight initialization
        self.b = np.zeros((1, output_size)) # bias initialization
        self.X = None
        self.Z = None
        self.A = None

    def forward(self, X):
        self.X = X
        self.Z = np.dot(X, self.W) + self.b
        self.A = np.maximum(0, self.Z)
        return self.A

    def backward(self, dA, learning_rate):
        dZ = np.multiply(dA, np.int64(self.A > 0))
        dW = np.dot(self.X.T, dZ)
        db = np.sum(dZ, axis=0, keepdims=True)
        dX = np.dot(dZ, self.W.T)

        self.W -= learning_rate * dW
        self.b -= learning_rate * db

        return dX

In [8]:
class SoftmaxLayer:
    def __init__(self, input_size, output_size):
        self.W = np.random.randn(input_size, output_size) * np.sqrt(2 / input_size) # weight initialization
        self.b = np.zeros((1, output_size)) # bias initialization

    def forward(self, X):
        self.X = X
        self.Z = np.dot(X, self.W) + self.b
        print(self.Z)
        exp_Z = np.exp(self.Z)
        self.A = exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
        return self.A

    def backward(self, dA, learning_rate):
        batch_size = self.X.shape[0]
        dZ = self.A - dA
        dW = np.dot(self.X.T, dZ) / batch_size
        db = np.sum(dZ, axis=0, keepdims=True) / batch_size
        dX = np.dot(dZ, self.W.T)

        self.W -= learning_rate * dW
        self.b -= learning_rate * db

        return dX

In [9]:
# Generate some dummy data
X = np.random.randn(100, 10)
y = np.random.randint(0, 2, size=(100, 1))

# Create a ReLU layer with 10 inputs and 5 outputs
relu_layer = ReLULayer(10, 5)

# Create a Softmax layer with 5 inputs and 2 outputs (for binary classification)
softmax_layer = SoftmaxLayer(5, 2)

# Train the network
learning_rate = 0.01
for i in range(1000):
    # Forward pass through first layer
    A1 = relu_layer.forward(X)

    # Forward pass through second layer
    y_pred = softmax_layer.forward(A1)

    # Compute loss and gradient of loss with respect to prediction
    loss = -np.mean(y * np.log(y_pred + 1e-8) + (1 - y) * np.log(1 - y_pred + 1e-8))
    dA2 = (y_pred - y) / (y_pred * (1 - y_pred) + 1e-8)

    # Backward pass through second layer and update weights
    dA1 = softmax_layer.backward(dA2, learning_rate)

    # Backward pass through first layer and update weights
    relu_layer.backward(dA1, learning_rate)

    if i % 100 == 0:
        print(f"Loss at iteration {i}: {loss:.4f}")

[[ 8.77576502e-01 -2.28054919e+00]
 [-2.30522561e-01  1.84058542e+00]
 [ 1.73860079e+00 -2.49847458e-01]
 [-1.10399992e-01  2.02838545e-01]
 [ 1.57081405e-01 -8.06125412e-01]
 [-6.46061492e-01  1.14923360e+00]
 [ 8.19132195e-01 -1.27756971e+00]
 [ 4.05747716e-03  1.92960463e+00]
 [ 8.55886802e-01 -1.64307840e+00]
 [ 1.38983329e+00  9.30999232e-02]
 [ 8.79054558e-01 -8.47504756e-01]
 [ 2.36087753e+00  2.20885650e+00]
 [-1.31706557e-01 -6.59739321e-02]
 [ 6.33252559e-01 -1.68927778e+00]
 [ 6.71342404e-01  5.45589490e-01]
 [ 1.62361666e+00  7.49000153e-01]
 [-4.61041274e-01  3.89372056e-01]
 [ 1.13799967e+00 -9.13221667e-01]
 [ 4.63743735e-01 -8.61648148e-01]
 [ 9.23350430e-01  9.13547769e-01]
 [ 9.09077336e-01  1.55741710e-01]
 [ 2.20151084e-01  2.95553210e+00]
 [-5.14762024e-01  1.95762533e+00]
 [ 2.86048074e-02 -2.05770296e-01]
 [ 1.40256607e+00 -2.00779169e+00]
 [ 1.05758417e-01  1.04956962e-01]
 [ 9.88815433e-01 -4.74042551e-01]
 [ 1.03839198e+00  2.56161375e-01]
 [ 3.81892228e-01 -1

  exp_Z = np.exp(self.Z)
  self.A = exp_Z / np.sum(exp_Z, axis=1, keepdims=True)


[[nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]

In [3]:
# Generate some dummy data
X = np.random.randn(100, 10)
y = np.random.randn(100, 1)

# Create a ReLU layer with 10 inputs and 5 outputs
layer1 = ReLULayer(10, 5)

# Create a ReLU layer with 5 inputs and 1 output (for regression)
layer2 = ReLULayer(5, 1)

# Train the network
learning_rate = 0.01
for i in range(1000):
    # Forward pass through first layer
    A1 = layer1.forward(X)

    # Forward pass through second layer
    y_pred = layer2.forward(A1)

    # Compute loss and gradient of loss with respect to prediction
    loss = np.mean((y - y_pred)**2)
    dA2 = y_pred - y

    # Backward pass through second layer and update weights
    dA1 = layer2.backward(dA2, learning_rate)

    # Backward pass through first layer and update weights
    layer1.backward(dA1, learning_rate)

    if i % 100 == 0:
        print(f"Loss at iteration {i}: {loss:.4f}")


Loss at iteration 0: 1.4413
Loss at iteration 100: 0.8468
Loss at iteration 200: 0.8452
Loss at iteration 300: 0.8441
Loss at iteration 400: 0.8437
Loss at iteration 500: 0.8435
Loss at iteration 600: 0.8434
Loss at iteration 700: 0.8433
Loss at iteration 800: 0.8432
Loss at iteration 900: 0.8432


In [47]:
# Create a ReLU layer with input size 3 and output size 4
relu_layer = ReLULayer(3, 4)

# Forward pass
x = np.array([[1, 2, 3]])
output = relu_layer.forward(x)
print(f"output {output}")  # Example output: [[0. 0.0322328 0. 0.]]

# Backward pass
d_output_ = np.array([[1, 1, 1, 1]])
d_inputs_ = relu_layer.backward(d_output_, learning_rate=0.1)
print(f"d_inputs: {d_inputs_}")  # Example output: [[0. 0.1 0. 0.1]]

weights: [[0.0096709  0.00907707 0.00155327 0.00841888]
 [0.00180067 0.00720393 0.00078848 0.00415811]
 [0.00034607 0.00165436 0.00610345 0.00070931]]
bias: [[0.00793937 0.0034374  0.00716166 0.00890719]]
output [[0.02224983 0.03188541 0.02860224 0.02777022]]


IndexError: boolean index did not match indexed array along dimension 1; dimension is 4 but corresponding boolean dimension is 3

In [1]:
import numpy as np

np.maximum(0, np.array([1, 2, 34]))

array([ 1,  2, 34])

In [16]:
import numpy as np

# Define the ReLU activation function
def relu(x):
    return np.maximum(x, 0)

# Define the softmax activation function
def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / np.sum(e_x, axis=1, keepdims=True)

# Define the categorical cross-entropy loss function
def categorical_cross_entropy_loss(y_true, y_pred):
    epsilon = 1e-10
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    num_examples = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred)) / num_examples
    return loss

# Define a neural network with one hidden layer
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, y, learning_rate):
        num_examples = X.shape[0]
        dZ2 = self.A2 - y
        dW2 = np.dot(self.A1.T, dZ2) / num_examples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / num_examples
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * (self.Z1 > 0)
        dW1 = np.dot(X.T, dZ1) / num_examples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / num_examples
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1

# Generate some example data
X = np.random.randn(100, 10)
y = np.random.randint(0, 2, size=(100, 3))
learning_rate = 0.02

# Create a neural network with one hidden layer
nn = NeuralNetwork(10, 20, 3)

# Train the neural network
for i in range(1000):
    y_pred = nn.forward(X)
    loss = categorical_cross_entropy_loss(y, y_pred)
    nn.backward(X, y, learning_rate)
    if i % 100 == 0:
        print("Iteration %d: loss = %.4f" % (i, loss))

Iteration 0: loss = 1.6919
Iteration 100: loss = 1.6914
Iteration 200: loss = 1.6912
Iteration 300: loss = 1.6909
Iteration 400: loss = 1.6900
Iteration 500: loss = 1.6882
Iteration 600: loss = 1.6869
Iteration 700: loss = 1.6834
Iteration 800: loss = 1.6821
Iteration 900: loss = 1.6841


In [17]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load the Fisher Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Convert the labels to one-hot encoded vectors
y_one_hot = np.zeros((y.size, y.max()+1))
y_one_hot[np.arange(y.size), y] = 1

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)

# Define the ReLU activation function
def relu(x):
    return np.maximum(x, 0)

# Define the softmax activation function
def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / np.sum(e_x, axis=1, keepdims=True)

# Define the categorical cross-entropy loss function
def categorical_cross_entropy_loss(y_true, y_pred):
    epsilon = 1e-10
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    num_examples = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred)) / num_examples
    return loss

# Define a neural network with one hidden layer
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, y, learning_rate):
        num_examples = X.shape[0]
        dZ2 = self.A2 - y
        dW2 = np.dot(self.A1.T, dZ2) / num_examples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / num_examples
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * (self.Z1 > 0)
        dW1 = np.dot(X.T, dZ1) / num_examples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / num_examples
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1

# Create a neural network with one hidden layer
nn = NeuralNetwork(4, 10, 3)

# Train the neural network
learning_rate = 0.1
num_iterations = 1000

for i in range(num_iterations):
    # Perform a forward pass through the network
    y_pred_train = nn.forward(X_train)

    # Compute the loss and accuracy on the training set
    loss_train = categorical_cross_entropy_loss(y_train, y_pred_train)
    accuracy_train = np.mean(np.argmax(y_train, axis=1) == np.argmax(y_pred_train, axis=1))

    # Perform a backward pass through the network
    nn.backward(X_train, y_train, learning_rate)

    # Perform a forward pass through the network on the test set
    y_pred_test = nn.forward(X_test)

    # Compute the loss and accuracy on the test set
    loss_test = categorical_cross_entropy_loss(y_test, y_pred_test)
    accuracy_test = np.mean(np.argmax(y_test, axis=1) == np.argmax(y_pred_test, axis=1))

    # Print the loss and accuracy on the training and test sets
    if i % 100 == 0:
        print(f"Iteration {i}: Training Loss = {loss_train:.4f}, Training Accuracy = {accuracy_train:.4f}, Test Loss = {loss_test:.4f}, Test Accuracy = {accuracy_test:.4f}")

Iteration 0: Training Loss = 1.0986, Training Accuracy = 0.3333, Test Loss = 1.0982, Test Accuracy = 0.3000
Iteration 100: Training Loss = 0.3419, Training Accuracy = 0.9750, Test Loss = 0.3398, Test Accuracy = 0.9667
Iteration 200: Training Loss = 0.3282, Training Accuracy = 0.8083, Test Loss = 0.3394, Test Accuracy = 0.8333
Iteration 300: Training Loss = 0.2110, Training Accuracy = 0.9167, Test Loss = 0.2479, Test Accuracy = 0.8667
Iteration 400: Training Loss = 0.1254, Training Accuracy = 0.9583, Test Loss = 0.1793, Test Accuracy = 0.8667
Iteration 500: Training Loss = 0.1433, Training Accuracy = 0.9417, Test Loss = 0.2261, Test Accuracy = 0.8667
Iteration 600: Training Loss = 0.0766, Training Accuracy = 0.9833, Test Loss = 0.0932, Test Accuracy = 1.0000
Iteration 700: Training Loss = 0.0713, Training Accuracy = 0.9833, Test Loss = 0.0868, Test Accuracy = 1.0000
Iteration 800: Training Loss = 0.0685, Training Accuracy = 0.9833, Test Loss = 0.0828, Test Accuracy = 1.0000
Iteration 90

In [24]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load the Fisher Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Convert the labels to one-hot encoded vectors
y_one_hot = np.zeros((y.size, y.max()+1))
y_one_hot[np.arange(y.size), y] = 1

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)

# Define the ReLU activation function
def relu(x):
    return np.maximum(x, 0)

# Define the softmax activation function
def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / np.sum(e_x, axis=1, keepdims=True)

# Define the categorical cross-entropy loss function
def categorical_cross_entropy_loss(y_true, y_pred):
    epsilon = 1e-10
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    num_examples = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred)) / num_examples
    return loss

# Define a neural network with one hidden layer
class NeuralNetwork:
    def __init__(self, input_size, hidden_size_0, hidden_size_1, output_size):
        # ReLU 0
        self.W0 = np.random.randn(input_size, hidden_size_0) * 0.01
        self.b0 = np.zeros((1, hidden_size_0))

        # ReLU 1
        self.W1 = np.random.randn(hidden_size_0, hidden_size_1) * 0.01
        self.b1 = np.zeros((1, hidden_size_1))

        # Softmax 2
        self.W2 = np.random.randn(hidden_size_1, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        # ReLU 0
        self.Z0 = np.dot(X, self.W0) + self.b0
        self.A0 = relu(self.Z0)

        # ReLU 1
        self.Z1 = np.dot(self.A0, self.W1) + self.b1
        self.A1 = relu(self.Z1)

        # Softmax 2
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, y, learning_rate):
        num_examples = X.shape[0]

        dZ2 = self.A2 - y
        dW2 = np.dot(self.A1.T, dZ2) / num_examples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / num_examples
        dA1 = np.dot(dZ2, self.W2.T)

        dZ1 = dA1 * (self.Z1 > 0)
        dW1 = np.dot(self.A0.T, dZ1) / num_examples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / num_examples
        dA0 = np.dot(dZ1, self.W1.T)

        dZ0 = dA0 * (self.Z0 > 0)
        dW0 = np.dot(X.T, dZ0) / num_examples
        db0 = np.sum(dZ0, axis=0, keepdims=True) / num_examples

        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W0 -= learning_rate * dW0
        self.b0 -= learning_rate * db0

# Create a neural network
nn = NeuralNetwork(4, 6, 4, 3)

# Train the neural network
learning_rate = 0.1
num_iterations = 10_000

for i in range(num_iterations):
    # Perform a forward pass through the network
    y_pred_train = nn.forward(X_train)

    # Compute the loss and accuracy on the training set
    loss_train = categorical_cross_entropy_loss(y_train, y_pred_train)
    accuracy_train = np.mean(np.argmax(y_train, axis=1) == np.argmax(y_pred_train, axis=1))

    # Perform a backward pass through the network
    nn.backward(X_train, y_train, learning_rate)

    # Perform a forward pass through the network on the test set
    y_pred_test = nn.forward(X_test)

    # Compute the loss and accuracy on the test set
    loss_test = categorical_cross_entropy_loss(y_test, y_pred_test)
    accuracy_test = np.mean(np.argmax(y_test, axis=1) == np.argmax(y_pred_test, axis=1))

    # Print the loss and accuracy on the training and test sets
    if i % 100 == 0:
        print(f"Iteration {i}: Training Loss = {loss_train:.4f}, Training Accuracy = {accuracy_train:.4f}, Test Loss = {loss_test:.4f}, Test Accuracy = {accuracy_test:.4f}")

Iteration 0: Training Loss = 1.0986, Training Accuracy = 0.4000, Test Loss = 1.0987, Test Accuracy = 0.3000
Iteration 100: Training Loss = 1.0984, Training Accuracy = 0.3417, Test Loss = 1.1004, Test Accuracy = 0.3000
Iteration 200: Training Loss = 1.0984, Training Accuracy = 0.3417, Test Loss = 1.1005, Test Accuracy = 0.3000
Iteration 300: Training Loss = 1.0983, Training Accuracy = 0.3417, Test Loss = 1.1004, Test Accuracy = 0.3000
Iteration 400: Training Loss = 1.0980, Training Accuracy = 0.3417, Test Loss = 1.1001, Test Accuracy = 0.3000
Iteration 500: Training Loss = 1.0739, Training Accuracy = 0.5417, Test Loss = 1.0708, Test Accuracy = 0.6333
Iteration 600: Training Loss = 0.5373, Training Accuracy = 0.7167, Test Loss = 0.5211, Test Accuracy = 0.7667
Iteration 700: Training Loss = 0.4062, Training Accuracy = 0.9000, Test Loss = 0.3915, Test Accuracy = 0.8000
Iteration 800: Training Loss = 0.3100, Training Accuracy = 0.9500, Test Loss = 0.2996, Test Accuracy = 0.9667
Iteration 90