In [None]:
import numpy as np

In [None]:
class Layer:
    def __init__(self, input_size, output_size, activation='relu'):
        self.input_size = input_size
        self.output_size = output_size
        self.activation = activation

        # Initialize weights and biases
        self.weights = np.random.randn(output_size, input_size) * np.sqrt(2. / input_size)  # He initialization
        self.biases = np.zeros((output_size, 1))

        # Store activation function
        self.activation_function = self.get_activation_function(activation)
        self.activation_derivative = self.get_activation_derivative(activation)

    def get_activation_function(self, activation):
        if activation == 'relu':
            return lambda x: np.maximum(0, x)
        elif activation == 'sigmoid':
            return lambda x: 1 / (1 + np.exp(-x))
        elif activation == 'tanh':
            return lambda x: np.tanh(x)
        else:
            return lambda x: x  # Linear activation

    def get_activation_derivative(self, activation):
        if activation == 'relu':
            return lambda x: (x > 0).astype(float)
        elif activation == 'sigmoid':
            return lambda x: self.activation_function(x) * (1 - self.activation_function(x))
        elif activation == 'tanh':
            return lambda x: 1 - np.tanh(x)**2
        else:
            return lambda x: np.ones_like(x)  # Derivative for linear is 1

    def forward(self, input_data):
        self.input_data = input_data
        self.z = np.dot(self.weights, input_data) + self.biases
        self.a = self.activation_function(self.z)
        return self.a

    def backward(self, dA, learning_rate):
        m = self.input_data.shape[1]  # Batch size
        dZ = dA * self.activation_derivative(self.z)  # Elementwise multiplication of derivative
        dW = np.dot(dZ, self.input_data.T) / m
        db = np.sum(dZ, axis=1, keepdims=True) / m
        dA_prev = np.dot(self.weights.T, dZ)

        # Update weights and biases using SGD
        self.weights -= learning_rate * dW
        self.biases -= learning_rate * db

        return dA_prev

In [None]:
input_size = 3
output_size = 2
layer = Layer(input_size, output_size, activation='sigmoid')

input_vector = np.array([1, 2, 3]).reshape(input_size, 1)
output_vector = layer.forward(input_vector)
print(output_vector)

[[0.7652793 ]
 [0.56028969]]


In [None]:
class NeuralNetwork:
    def __init__(self, input_size):
        self.layers = []
        self.input_size = input_size

    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, X):
        input_data = X
        for layer in self.layers:
            input_data = layer.forward(input_data)
        return input_data

    def backward(self, Y, learning_rate):
        m = Y.shape[1]
        dA = self.layers[-1].a - Y  # Loss derivative with respect to output
        for layer in reversed(self.layers):
            dA = layer.backward(dA, learning_rate)

    def compute_loss(self, predictions, Y):
        m = Y.shape[1]
        return np.sum((predictions - Y) ** 2) / (2 * m)

    def train(self, X, Y, epochs, learning_rate):
        for epoch in range(epochs):
            # Forward pass
            output = self.forward(X)

            # Compute loss
            loss = self.compute_loss(output, Y)

            # Backward pass
            self.backward(Y, learning_rate)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}/{epochs} complete, Loss: {loss}")


In [None]:
nn = NeuralNetwork(input_size=2)
nn.add_layer(Layer(input_size=2, output_size=10, activation='relu'))  # First hidden layer
nn.add_layer(Layer(input_size=10, output_size=10, activation='relu'))  # Second hidden layer
nn.add_layer(Layer(input_size=10, output_size=1, activation='linear'))  # Output layer

x = np.random.rand()
y = np.random.rand()

input_vector = np.array([x, y]).reshape(2, 1)
output_vector = nn.forward(input_vector)
print(output_vector)

[[0.37613905]]


In [None]:
# Define the input-output function y = sin(2πxy) + 2xy²
def target_function(x, y):
    return np.sin(2 * np.pi * x * y) + 2 * x * y ** 2

# Generate random training data
np.random.seed(42)
X = np.random.rand(2, 1000)  # 1000 samples, x and y between 0 and 1
Y = target_function(X[0], X[1]).reshape(1, 1000)  # Target output

X_train = X[:, :800]
Y_train = Y[:, :800]

X_test = X[:, 800:]
Y_test = Y[:, 800:]

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(2, 800)
(1, 800)
(2, 200)
(1, 200)


In [None]:
nn = NeuralNetwork(input_size=2)
nn.add_layer(Layer(input_size=2, output_size=10, activation='relu'))  # First hidden layer
nn.add_layer(Layer(input_size=10, output_size=10, activation='relu'))  # Second hidden layer
nn.add_layer(Layer(input_size=10, output_size=1, activation='linear'))  # Output layer

# Train the network
nn.train(X, Y, epochs=1000, learning_rate=0.01)

Epoch 0/1000 complete, Loss: 0.19598861028521372
Epoch 100/1000 complete, Loss: 0.1254776894918218
Epoch 200/1000 complete, Loss: 0.10598490966523468
Epoch 300/1000 complete, Loss: 0.09682446242006398
Epoch 400/1000 complete, Loss: 0.09122907623764602
Epoch 500/1000 complete, Loss: 0.08725785805291585
Epoch 600/1000 complete, Loss: 0.08408067261336181
Epoch 700/1000 complete, Loss: 0.08133948207692242
Epoch 800/1000 complete, Loss: 0.07899308889905757
Epoch 900/1000 complete, Loss: 0.07698303896375883


In [None]:
# Make predictions on the test data using the trained neural network
Y_pred = nn.forward(X_test)

# Compute the Mean Squared Error (MSE) between true values and predicted values
mse = np.mean((Y_pred - Y_test) ** 2)
print(f"Mean Squared Error on Test Data: {mse}")

Mean Squared Error on Test Data: 0.1582241322293819


In [None]:
class Layer:
    def __init__(self, input_size, output_size, activation='relu'):
        self.input_size = input_size
        self.output_size = output_size
        self.activation = activation

        # Initialize weights and biases
        self.weights = np.random.randn(output_size, input_size) * np.sqrt(2. / input_size)
        self.biases = np.zeros((output_size, 1))

        # Total number of parameters
        self.n_params = output_size * input_size + output_size

        # Store activation function
        self.activation_function = self.get_activation_function(activation)
        self.activation_derivative = self.get_activation_derivative(activation)

    def get_activation_function(self, activation):
        if activation == 'relu':
            return lambda x: np.maximum(0, x)
        elif activation == 'sigmoid':
            return lambda x: 1 / (1 + np.exp(-x))
        elif activation == 'tanh':
            return lambda x: np.tanh(x)
        else:
            return lambda x: x

    def get_activation_derivative(self, activation):
        if activation == 'relu':
            return lambda x: (x > 0).astype(float)
        elif activation == 'sigmoid':
            return lambda x: self.activation_function(x) * (1 - self.activation_function(x))
        elif activation == 'tanh':
            return lambda x: 1 - np.tanh(x)**2
        else:
            return lambda x: np.ones_like(x)

    def forward(self, input_data, param_index=None):
        self.input_data = input_data
        batch_size = input_data.shape[1]

        # Initialize derivatives
        w_size = self.output_size * self.input_size
        if param_index is not None:
            # Create derivative matrices
            dw = np.zeros_like(self.weights)
            db = np.zeros_like(self.biases)

            if param_index < w_size:
                # This is a weight parameter
                i, j = param_index // self.input_size, param_index % self.input_size
                dw[i, j] = 1.0
            else:
                # This is a bias parameter
                bias_idx = param_index - w_size
                db[bias_idx] = 1.0

        # Forward pass
        z = np.dot(self.weights, input_data) + self.biases
        a = self.activation_function(z)

        if param_index is not None:
            # Compute derivative of z
            dz = np.dot(dw, input_data) + db
            # Compute derivative of activation
            da = self.activation_derivative(z) * dz
            return a, da
        return a, None

    def update_parameters(self, learning_rate, gradients):
        w_size = self.output_size * self.input_size

        # Reshape gradients for weights and biases
        dW = gradients[:w_size].reshape(self.output_size, self.input_size)
        db = gradients[w_size:].reshape(self.output_size, 1)

        # Update parameters
        self.weights -= learning_rate * dW
        self.biases -= learning_rate * db

In [None]:
class NeuralNetwork:
    def __init__(self, input_size):
        self.layers = []
        self.input_size = input_size

    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, X):
        input_data = X
        for layer in self.layers:
            input_data, _ = layer.forward(input_data)
        return input_data

    def get_total_params(self):
        return sum(layer.n_params for layer in self.layers)

    def compute_gradients(self, X, Y):
        n_total_params = self.get_total_params()
        gradients = []

        # For each parameter in the network
        param_count = 0
        for layer_idx, layer in enumerate(self.layers):
            for param_idx in range(layer.n_params):
                # Forward pass with derivative with respect to this parameter
                input_data = X
                param_derivative = None

                for i, current_layer in enumerate(self.layers):
                    if i == layer_idx:
                        input_data, param_derivative = current_layer.forward(input_data, param_idx)
                    else:
                        input_data, _ = current_layer.forward(input_data)

                # Compute loss derivative
                m = Y.shape[1]
                output_derivative = (input_data - Y) / m

                # Compute gradient for this parameter
                if param_derivative is not None:
                    grad = np.sum(output_derivative * param_derivative)
                    gradients.append(grad)

                param_count += 1

        return np.array(gradients)

    def train_step(self, X, Y, learning_rate):
        # Compute gradients for all parameters
        gradients = self.compute_gradients(X, Y)

        # Update parameters in each layer
        param_start = 0
        for layer in self.layers:
            param_end = param_start + layer.n_params
            layer.update_parameters(learning_rate, gradients[param_start:param_end])
            param_start = param_end

    def compute_loss(self, predictions, Y):
        m = Y.shape[1]
        return np.sum((predictions - Y) ** 2) / (2 * m)

    def train(self, X, Y, epochs, learning_rate):
        for epoch in range(epochs):
            # Perform training step
            self.train_step(X, Y, learning_rate)

            # Compute loss
            output = self.forward(X)
            loss = self.compute_loss(output, Y)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}/{epochs} complete, Loss: {loss}")


In [None]:
# Initialize neural network
nn = NeuralNetwork(input_size=2)
nn.add_layer(Layer(input_size=2, output_size=10, activation='relu'))  # First hidden layer
nn.add_layer(Layer(input_size=10, output_size=10, activation='relu'))  # Second hidden layer
nn.add_layer(Layer(input_size=10, output_size=1, activation='linear'))  # Output layer

# Train the network
nn.train(X, Y, epochs=1000, learning_rate=0.01)

Epoch 0/1000 complete, Loss: 0.4125869786199614
Epoch 100/1000 complete, Loss: 0.08828936666874818
Epoch 200/1000 complete, Loss: 0.08416290359280922
Epoch 300/1000 complete, Loss: 0.08188915488313034
Epoch 400/1000 complete, Loss: 0.08062927155411227
Epoch 500/1000 complete, Loss: 0.07989338381246565
Epoch 600/1000 complete, Loss: 0.07943426453093261
Epoch 700/1000 complete, Loss: 0.079135729132181
Epoch 800/1000 complete, Loss: 0.07892120120191445
Epoch 900/1000 complete, Loss: 0.0787612458397065


In [None]:
# Make predictions on the test data using the trained neural network
Y_pred = nn.forward(X_test)

# Compute the Mean Squared Error (MSE) between true values and predicted values
mse = np.mean((Y_pred - Y_test) ** 2)
print(f"Mean Squared Error on Test Data: {mse}")

Mean Squared Error on Test Data: 0.16170410037842894


In [232]:
class Layer:
    def __init__(self, input_size, output_size, activation='relu'):
        self.input_size = input_size
        self.output_size = output_size
        self.activation = activation

        # Initialize weights and biases
        self.weights = np.random.randn(output_size, input_size) * np.sqrt(2. / input_size)  # He initialization
        self.biases = np.zeros((output_size, 1))

        # Store activation function
        self.activation_function = self.get_activation_function(activation)
        self.activation_derivative = self.get_activation_derivative(activation)

    def get_activation_function(self, activation):
        if activation == 'relu':
            return lambda x: np.maximum(0, x)
        elif activation == 'sigmoid':
            return lambda x: 1 / (1 + np.exp(-x))
        elif activation == 'tanh':
            return lambda x: np.tanh(x)
        else:
            return lambda x: x  # Linear activation

    def get_activation_derivative(self, activation):
        if activation == 'relu':
            return lambda x: (x > 0).astype(float)
        elif activation == 'sigmoid':
            return lambda x: self.activation_function(x) * (1 - self.activation_function(x))
        elif activation == 'tanh':
            return lambda x: 1 - np.tanh(x)**2
        else:
            return lambda x: np.ones_like(x)  # Derivative for linear is 1

    def forward(self, input_data):
        self.input_data = input_data
        self.z = np.dot(self.weights, input_data) + self.biases
        self.a = self.activation_function(self.z)
        return self.a

    def backward(self, dA, learning_rate):
        m = self.input_data.shape[1]  # Batch size
        self.dZ = dA * self.activation_derivative(self.z)  # Elementwise multiplication of derivative
        self.dW = np.dot(self.dZ, self.input_data.T) / m
        self.db = np.sum(self.dZ, axis=1, keepdims=True) / m
        dA_prev = np.dot(self.weights.T, self.dZ)

        print("dW:", self.dW)
        print("db:", self.db)

        # Update weights and biases using SGD
        self.weights -= learning_rate * self.dW
        self.biases -= learning_rate * self.db

        return dA_prev

class NeuralNetwork:
    def __init__(self, input_size):
        self.layers = []
        self.input_size = input_size

    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, X):
        input_data = X
        for layer in self.layers:
            input_data = layer.forward(input_data)
        return input_data

    def backward(self, Y, learning_rate):
        m = Y.shape[1]
        dA = self.layers[-1].a - Y  # Loss derivative with respect to output
        for layer in reversed(self.layers):
            dA = layer.backward(dA, learning_rate)

    def compute_loss(self, predictions, Y):
        m = Y.shape[1]
        return np.sum((predictions - Y) ** 2) / (2 * m)

def target_function(x, y):
    return np.sin(2 * np.pi * x * y) + 2 * x * y ** 2

nn = NeuralNetwork(input_size=2)
nn.add_layer(Layer(input_size=2, output_size=10, activation='relu'))  # First hidden layer
nn.add_layer(Layer(input_size=10, output_size=10, activation='relu'))  # Second hidden layer
nn.add_layer(Layer(input_size=10, output_size=1, activation='linear'))  # Output layer

x = np.random.rand()
y = np.random.rand()

input_vector = np.array([x, y]).reshape(2, 1)
output_vector = nn.forward(input_vector)
nn.backward(output_vector, learning_rate=0.01)