In [2]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

# Generate dataset
def generate_data(n_samples=10000, x_min=0, x_max=10):
    X = np.random.uniform(x_min, x_max, (n_samples, 3))
    y1 = 2 * X[:, 0] + 3 * X[:, 1] + X[:, 2] + 1
    y2 = 3 * X[:, 0] + 2 * X[:, 1] + X[:, 2] ** 2 + 2

    y = np.column_stack((y1, y2))

    # Normalize inputs and outputs
    X_min, X_max = X.min(axis=0), X.max(axis=0)
    y_min, y_max = y.min(axis=0), y.max(axis=0)

    X = (X - X_min) / (X_max - X_min)
    y = (y - y_min) / (y_max - y_min)

    return X, y, X_min, X_max, y_min, y_max

# Define activation functions and their derivatives
def leaky_relu(x, alpha=0.01):
    return np.where(x > 0, x, alpha * x)

def leaky_relu_derivative(x, alpha=0.01):
    return np.where(x > 0, 1, alpha)

def linear(x):
    return x

def linear_derivative(x):
    return np.ones_like(x)

activation_functions = {
    'relu': (leaky_relu, leaky_relu_derivative),
    'linear': (linear, linear_derivative)
}

class NeuralNetwork:
    def __init__(self, layers, activations):
        self.layers = layers
        self.activations = activations
        self.weights = []
        self.biases = []
        self.initialize_weights()

    def initialize_weights(self):
        for i in range(len(self.layers) - 1):
            weight = np.random.randn(self.layers[i], self.layers[i + 1]) * np.sqrt(2 / self.layers[i])  # He Initialization
            bias = np.zeros((1, self.layers[i + 1]))
            self.weights.append(weight)
            self.biases.append(bias)

    def feedforward(self, X):
        self.layer_outputs = [X]
        for i in range(len(self.weights)):
            activation, _ = activation_functions[self.activations[i]]
            X = activation(np.dot(X, self.weights[i]) + self.biases[i])
            self.layer_outputs.append(X)
        return X

    def backpropagation(self, X, y, learning_rate):
        output_error = y - self.layer_outputs[-1]
        _, derivative = activation_functions[self.activations[-1]]
        deltas = [output_error * derivative(self.layer_outputs[-1])]

        for i in reversed(range(len(deltas), len(self.weights))):
            _, derivative = activation_functions[self.activations[i]]
            delta = deltas[-1].dot(self.weights[i].T) * derivative(self.layer_outputs[i])
            deltas.append(delta)
        deltas.reverse()

        max_grad_norm = 5  # Gradient clipping threshold
        for i in range(len(self.weights)):
            np.clip(deltas[i], -max_grad_norm, max_grad_norm, out=deltas[i])  # Clip gradients
            self.weights[i] += self.layer_outputs[i].T.dot(deltas[i]) * learning_rate
            self.biases[i] += np.sum(deltas[i], axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            self.feedforward(X)
            self.backpropagation(X, y, learning_rate)
            if epoch % 100 == 0 or epoch == epochs - 1:
                mse = np.mean(np.square(y - self.layer_outputs[-1]))
                print(f'Epoch {epoch + 1}/{epochs}, Error: {mse:.6f}')

    def predict(self, X):
        return self.feedforward(X)

# Generate dataset
X, y, X_min, X_max, y_min, y_max = generate_data()

# Define network structure
input_size = 3
output_size = 2
hidden_layers = [10, 15, 10]
layers = [input_size] + hidden_layers + [output_size]
activations = ['relu', 'relu', 'relu', 'linear']

# Initialize neural network
nn = NeuralNetwork(layers, activations)

# Train the neural network
nn.train(X, y, epochs=1000, learning_rate=0.00001)

# Predict value for new input
x_test = np.array([[4, 5, 6]])
x_test_norm = (x_test - X_min) / (X_max - X_min)  # Normalize input

y_test_pred_norm = nn.predict(x_test_norm)
y_test_pred = y_test_pred_norm * (y_max - y_min) + y_min  # Denormalize prediction

# Exact function values
y_exact = np.array([
    2 * x_test[0, 0] + 3 * x_test[0, 1] + x_test[0, 2] + 1,
    3 * x_test[0, 0] + 2 * x_test[0, 1] + x_test[0, 2] ** 2 + 2
])

print(f'Prediction for x_test={x_test}: {y_test_pred[0]}, Exact value: {y_exact}')


Epoch 1/1000, Error: 0.206158
Epoch 101/1000, Error: 0.006617
Epoch 201/1000, Error: 0.004046
Epoch 301/1000, Error: 0.003227
Epoch 401/1000, Error: 0.002779
Epoch 501/1000, Error: 0.002466
Epoch 601/1000, Error: 0.002214
Epoch 701/1000, Error: 0.002004
Epoch 801/1000, Error: 0.001823
Epoch 901/1000, Error: 0.001660
Epoch 1000/1000, Error: 0.001516
Prediction for x_test=[[4 5 6]]: [28.45633997 67.69589597], Exact value: [30 60]
