In [None]:
import numpy as np
import random as rnd

# Data generation
func = lambda x1, x2: (1 - x1) ** 2 + 100 * (x2 - x1 ** 2) ** 2
X = [[rnd.uniform(-1, 1), rnd.uniform(-1, 1)] for _ in range(200)]
T1 = [[rnd.uniform(-1, 1), rnd.uniform(-1, 1)] for _ in range(100)]
Y = [func(x[0], x[1]) for x in X]
Y_test = [func(t[0], t[1]) for t in T1]

X = np.array(X)
Y = np.array(Y).reshape(-1, 1)
T1 = np.array(T1)
Y_T = np.array(Y_test).reshape(-1, 1)

# Activation functions
def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return np.where(z > 0, 1, 0)

def absolute(z):
    return np.abs(z)

# Forward propagation with ReLU for hidden layer and absolute activation for output
def forward_propagation(X):
    u = np.dot(X, weights_input_to_hidden) + biases_hidden
    z = relu(u)  # Hidden layer activation
    v = np.dot(z, weights_hidden_to_output) + biases_output
    yhat = relu(v)  # Absolute activation for output layer
    return z, yhat

#clip gradients
def clip_gradients(dw_hidden, db_hidden, dw_output, db_output, clip_value=1.0):
    np.clip(dw_hidden, -clip_value, clip_value, out=dw_hidden)
    np.clip(db_hidden, -clip_value, clip_value, out=db_hidden)
    np.clip(dw_output, -clip_value, clip_value, out=dw_output)
    np.clip(db_output, -clip_value, clip_value, out=db_output)


# Backpropagation with gradient calculations
def backpropagation(X, Y, z, yhat):
    e = yhat - Y
    dw_output = np.dot(z.T, e)
    db_output = np.sum(e, axis=0, keepdims=True)
    dz_hidden = np.dot(e, weights_hidden_to_output.T) * relu_derivative(z)
    dw_hidden = np.dot(X.T, dz_hidden)
    db_hidden = np.sum(dz_hidden, axis=0, keepdims=True)
    return dw_hidden, db_hidden, dw_output, db_output



# Update parameters with gradient clipping
def update_parameters(dw_hidden, db_hidden, dw_output, db_output, learning_rate):
    clip_gradients(dw_hidden, db_hidden, dw_output, db_output)
    global weights_input_to_hidden, biases_hidden, weights_hidden_to_output, biases_output
    weights_input_to_hidden -= learning_rate * dw_hidden
    biases_hidden -= learning_rate * db_hidden
    weights_hidden_to_output -= learning_rate * dw_output
    biases_output -= learning_rate * db_output

# Training function
def train(X, Y, epochs, learning_rate, epoch_print):
    for epoch in range(epochs):
        z, yhat = forward_propagation(X)
        loss = np.mean((yhat - Y) ** 2)  # Mean squared error
        dw_hidden, db_hidden, dw_output, db_output = backpropagation(X, Y, z, yhat)
        update_parameters(dw_hidden, db_hidden, dw_output, db_output, learning_rate)
        #if epoch % 1000 == 0:
         #   learning_rate *= 0.99  # Optional: Decay learning rate
        if epoch % epoch_print == 0:
            print(f"Loss at epoch {epoch}: {loss}")

# Prediction function
def predict(X):
    z, yhat = forward_propagation(X)
    mse = np.mean((yhat - Y_T) ** 2)
    print(f"Mean Squared Error: {mse}")
    mae = np.mean(np.abs(yhat - Y_T))
    print(f"Mean Absolute Error: {mae}")

# Initialize parameters
input_size = 2
hidden_size = 70
output_size = 1
weights_input_to_hidden = np.random.randn(input_size, hidden_size) * np.sqrt(1 / input_size)
weights_hidden_to_output = np.random.randn(hidden_size, output_size) * np.sqrt(1 / hidden_size)
biases_hidden = np.random.randn(1, hidden_size)
biases_output = np.random.randn(1, output_size)

# Training and Prediction
epochs = 25001
learning_rate = 0.0008
epoch_print = 500
train(X, Y, epochs, learning_rate, epoch_print)
predict(T1)

print(forward_propagation([-1,-1])[1])

Loss at epoch 0: 6813.198923800566
Loss at epoch 500: 5227.560374596914
Loss at epoch 1000: 3307.0279562029878
Loss at epoch 1500: 2497.3077752902627
Loss at epoch 2000: 1937.0424157439243
Loss at epoch 2500: 1471.0927818997475
Loss at epoch 3000: 1110.2182296118324
Loss at epoch 3500: 835.3470685208082
Loss at epoch 4000: 611.198178453714
Loss at epoch 4500: 414.1910153612584
Loss at epoch 5000: 270.56313500211115
Loss at epoch 5500: 179.62750004725953
Loss at epoch 6000: 117.64071976368625
Loss at epoch 6500: 78.8757559017861
Loss at epoch 7000: 52.457045067054985
Loss at epoch 7500: 34.63452614618048
Loss at epoch 8000: 22.47687123300391
Loss at epoch 8500: 14.455519475418054
Loss at epoch 9000: 9.648347216702637
Loss at epoch 9500: 7.382303499898655
Loss at epoch 10000: 6.099470626721984
Loss at epoch 10500: 5.481470212410995
Loss at epoch 11000: 4.906118897966373
Loss at epoch 11500: 4.364433520270035
Loss at epoch 12000: 4.154656421786957
Loss at epoch 12500: 4.12115353173036
Los