# Import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  
from sklearn.model_selection import train_test_split
import math

# Load Dataset and Split to Train and Test

In [None]:
# Load data from Excel file
# data = pd.read_excel('Temperature Dataset.xlsx', header=None).values
# data = pd.read_excel('ECG Datasets.xlsx', header=None).values
data = pd.read_excel('Lorenz Dataset.xlsx', header=None).values

num_data = data.shape[0]

# Normalize the input data
for ii in range(4): 
    data[:, ii] = data[:, ii] / np.max(data[:, ii]) 
    
# Split the dataset into a training and testing set
# X_train, X_test,y_train, y_test = train_test_split(data[:num_data, :3], data[:num_data,3], test_size=0.25, random_state=42)

split_ratio_train = 0.7

split_line_number = int(np.shape(data)[0] * split_ratio_train)
X_train = data[:split_line_number, :3]
y_train = data[:split_line_number, 3]

other_data = data[split_line_number:, :4]

X_test = data[split_line_number:len(data), :3]
y_test = data[split_line_number:len(data), 3]

# Activation Function

In [None]:
ACTIVATION_FUNC = 'leaky_relu'
alpha = 0.01
def activation_function(x,fun_name=ACTIVATION_FUNC):
    if(fun_name == 'relu'): 
        return np.maximum(0, x)
    elif(fun_name == 'logsig'): 
        return  1 /( 1 + (math.e)**(-1 * x))
    elif(fun_name == 'tansig'):
        return 2/(1+ (math.e)**(-2*x))-1
    elif(fun_name == 'leaky_relu'): 
        return np.where(x > 0, x, alpha * x) 

def activation_function_derivative(x,fun_name=ACTIVATION_FUNC):
    if(fun_name == 'relu'): 
        return np.where(x > 0, 1, 0)
    elif(fun_name == 'logsig'): 
        a = activation_function(x)
        a = np.reshape(a, (-1,1))
        b = 1 - activation_function(x)
        b = np.reshape(b, (-1,1))
        b = np.transpose(b)
        return np.diag(np.diag(np.matmul(a,b)))
    elif(fun_name == 'tansig'):
        tansig_x = activation_function(x)
        return 1 - tansig_x**2
    elif(fun_name == 'leaky_relu'):
        return np.where(x > 0, 1, alpha)


# Initialize Parameters

In [None]:
# Define the number of input, hidden, and output neurons
input_neurons = X_train.shape[1]
l1_neurons = 3
l2_neurons = 100
l3_neurons = 20 
output_neurons = 1  # Linear activation for regression

# Initialize the weights with random values in range (-1,1)
np.random.seed(1)
w1 = 2 * np.random.random((input_neurons, l1_neurons)) - 1
w2 = 2 * np.random.random((l1_neurons, l2_neurons)) - 1
w3 = 2 * np.random.random((l2_neurons, l3_neurons)) - 1
w4 = 2 * np.random.random((l3_neurons, output_neurons)) - 1

# Initialize the biases with random values in range (-1,1) 
b1 = 2 * np.random.random(l1_neurons) - 1
b2 = 2 * np.random.random(l2_neurons) - 1
b3 = 2 * np.random.random(l3_neurons) - 1
b4 = 2 * np.random.random(output_neurons) - 1

#Initialize Adam Parameters
sw1 = np.zeros((input_neurons, l1_neurons))
vw1 = np.zeros((input_neurons, l1_neurons))
vw_hat1 = np.zeros((input_neurons, l1_neurons))
sw_hat1 = np.zeros((input_neurons, l1_neurons))

sw2 = np.zeros((l1_neurons, l2_neurons))
vw2 = np.zeros((l1_neurons, l2_neurons))
vw_hat2 = np.zeros((l1_neurons, l2_neurons))
sw_hat2 = np.zeros((l1_neurons, l2_neurons))

sw3 = np.zeros((l2_neurons, l3_neurons))
vw3 = np.zeros((l2_neurons, l3_neurons))
vw_hat3 = np.zeros((l2_neurons, l3_neurons))
sw_hat3 = np.zeros((l2_neurons, l3_neurons))

sw4 = np.zeros((l3_neurons, output_neurons))
vw4 = np.zeros((l3_neurons, output_neurons))
vw_hat4 = np.zeros((l3_neurons, output_neurons))
sw_hat4 = np.zeros((l3_neurons, output_neurons))

sb1 = np.zeros((1, l1_neurons))
vb1 = np.zeros((1, l1_neurons))
vb_hat1 = np.zeros((1, l1_neurons))
sb_hat1 = np.zeros((1, l1_neurons))

sb2 = np.zeros((1, l2_neurons))
vb2 = np.zeros((1, l2_neurons))
vb_hat2 = np.zeros((1, l2_neurons))
sb_hat2 = np.zeros((1, l2_neurons))

sb3 = np.zeros((1, l3_neurons))
vb3 = np.zeros((1, l3_neurons))
vb_hat3 = np.zeros((1, l3_neurons))
sb_hat3 = np.zeros((1, l3_neurons))

sb4 = np.zeros((1, output_neurons))
vb4 = np.zeros((1, output_neurons))
vb_hat4 = np.zeros((1, output_neurons))
sb_hat4 = np.zeros((1, output_neurons))

epsilon = 0.00000001
beta1 = 0.9
beta2 = 0.999

# Training parameters
learning_rate = 0.002
epochs = 500  # Train sample by sample 

mse_train = np.zeros(epochs)
mse_test = np.zeros(epochs)

# Train and Test

In [None]:
# Training the MLP for regression
for epoch in range(1,epochs+1):

    # Shuffle the training data in each epoch
#     shuffle_indices = np.arange(len(X_train))
#     np.random.shuffle(shuffle_indices)
#     X_train = X_train[shuffle_indices]
#     y_train = y_train[shuffle_indices]


    total_error = 0
    error_data_train = np.zeros(len(X_train))
    output_data_train = np.zeros(len(X_train))
    for i in range(len(X_train)):
        
        #-------------------------------- Feed Forward -------------------------------------
        input_layer = X_train[i:i+1] 
        
        net1 = np.dot(input_layer, w1)+b1  # net1 = x * w1 + b1
        o1   = activation_function(net1) #  o1 = f(net1)
        
        net2 = np.dot(o1, w2)+b2 # net2 = o1 * w2 + b2
        o2   = activation_function(net2) #  o2 = f(net2)
        
        net3 = np.dot(o2, w3) +b3   # net3 = o2 * w3 + b3
        o3   = activation_function(net3) #  o3 = f(net3)
        
        net4 = np.dot(o3, w4)+b4   # net4 = o3 * w4+b4
        o4   =  net4      #  o4 = net4 # Linear activation for regression 
        output_data_train[i] = o4
        
        #-------------------------------- Backpropagation ----------------------------------- 
        output_layer_error = y_train[i:i+1] - o4
        
        # update w4
        # dE/dw4 = dE/de * de/do4 * do4/dnet4 * dnet4/dw4 = e * -1 * fprim_net4 * o3 
        w4_old = w4 
        
        dw4 = -1 * o3.T.dot(output_layer_error)

        vw4 = beta1 * vw4 + (1 - beta1) * dw4
        vw_hat4 = vw4/(1-beta1**epoch)

        sw4 = beta2 * sw4 +(1-beta2) * (dw4 ** 2)
        sw_hat4 = sw4/(1-beta2**epoch)
        
        w4 = w4 - (learning_rate / (np.sqrt(sw_hat4)+epsilon))* vw_hat4
        
        # update b4
        # dE/db4 = dE/de * de/do4 * do4/dnet4 * dnet4/db4 = e * -1 * fprim_net4 * 1
        db4 = -1 * output_layer_error * 1
        vb4 = beta1 * vb4 + (1 - beta1) * db4
        vb_hat4 = vb4/(1-beta1**epoch)

        sb4 = beta2 * sb4 +(1-beta2) * (db4 ** 2)
        sb_hat4 = sb4/(1-beta2**epoch)
        
        b4 = b4 - (learning_rate / (np.sqrt(sb_hat4)+epsilon))* vb_hat4
        
        # update w3
        # dE/dw3 = dE/de * de/do4 * do4/dnet4 * dnet4/do3 * do3/dnet3 * dnet3/dw3 = e * -1 * fprim_net4 * w4 * fprim_net3 * o2
        w3_old = w3
        # Create a diagonal matrix
        fprim_net3 = np.array(activation_function_derivative(net3))[0] 
        diag_matrix_fprim_net3 = np.diag(fprim_net3)
        dw3 = -1 * output_layer_error * o2.T.dot(np.dot(w4_old.T,diag_matrix_fprim_net3))
        
        vw3 = beta1 * vw3 + (1 - beta1) * dw3
        vw_hat3 = vw3/(1-beta1**epoch)
        
        sw3 = beta2 * sw3 +(1-beta2) * (dw3 ** 2)
        sw_hat3 = sw3/(1-beta2**epoch)
                
        w3 = w3 - (learning_rate / (np.sqrt(sw_hat3)+epsilon))* vw_hat3
        
        # update b3
        # dE/db3 = dE/de * de/do4 * do4/dnet4 * dnet4/do3 * do3/dnet3 * dnet3/db3 = e * -1 * fprim_net4 * w4 * fprim_net3 * 1
        db3 = -1 * learning_rate * output_layer_error * np.dot(w4_old.T,diag_matrix_fprim_net3) * 1
        
        vb3 = beta1 * vb3 + (1 - beta1) * db3
        vb_hat3 = vb3/(1-beta1**epoch)
        
        sb3 = beta2 * sb3 +(1-beta2) * (db3 ** 2)
        sb_hat3 = sb3/(1-beta2**epoch)
                
        b3 = b3 - (learning_rate / (np.sqrt(sb_hat3)+epsilon))* vb_hat3
        
        # update w2
        # dE/dw2 = dE/de * de/do4 * do4/dnet4 * dnet4/do3 * do3/dnet3 * dnet3/do2 * do2/dnet2 * dnet2/dw2 = e * -1 * fprim_net4 * w4 * fprim_net3 * w3 * fprim_net2 * o1
        w2_old = w2 
        fprim_net2 = np.array(activation_function_derivative(net2))[0]
        diag_matrix_fprim_net2 = np.diag(fprim_net2) 
        dw2 = -1 * output_layer_error * o1.T.dot(np.dot(np.dot(w4_old.T,diag_matrix_fprim_net3),np.dot(w3_old.T,diag_matrix_fprim_net2)))
        
        vw2 = beta1 * vw2 + (1 - beta1) * dw2
        vw_hat2 = vw2/(1-beta1**epoch)
        
        sw2 = beta2 * sw2 +(1-beta2) * (dw2 ** 2)
        sw_hat2 = sw2/(1-beta2**epoch)
                
        w2 = w2 - (learning_rate / (np.sqrt(sw_hat2)+epsilon))* vw_hat2
        
        # update b2
        # dE/db2 = dE/de * de/do4 * do4/dnet4 * dnet4/do3 * do3/dnet3 * dnet3/do2 * do2/dnet2 * dnet2/db2 = e * -1 * fprim_net4 * w4 * fprim_net3 * w3 * fprim_net2 * 1
        db2 =  -1 * learning_rate * output_layer_error * np.dot(np.dot(w4_old.T,diag_matrix_fprim_net3),np.dot(w3_old.T,diag_matrix_fprim_net2)) * 1
        
        vb2 = beta1 * vb2 + (1 - beta1) * db2
        vb_hat2 = vb2/(1-beta1**epoch)
        
        sb2 = beta2 * sb2 +(1-beta2) * (db2 ** 2)
        sb_hat2 = sb2/(1-beta2**epoch)
                
        b2 = b2 - (learning_rate / (np.sqrt(sb_hat2)+epsilon))* vb_hat2
        
        # update w1
        # dE/dw1 = dE/de * de/do4 * do4/dnet4 * dnet4/do3 * do3/dnet3 * dnet3/do2 * do2/dnet2 * dnet2/do1 * do1/dnet1 * dnet1/dw1 = e * -1 * fprim_net4 * w4 * fprim_net3 * w3 * fprim_net2 * w2 * fprim_net1 * input
        w1_old = w1        # Create a diagonal matrix 
        fprim_net1 = np.array(activation_function_derivative(net1))[0]
        diag_matrix_fprim_net1 = np.diag(fprim_net1)
        dw1 = -1 * learning_rate * output_layer_error * input_layer.T.dot(np.dot(np.dot(np.dot(w4_old.T,diag_matrix_fprim_net3),np.dot(w3_old.T,diag_matrix_fprim_net2)),np.dot(w2_old.T,diag_matrix_fprim_net1)))
        
        vw1 = beta1 * vw1 + (1 - beta1) * dw1
        vw_hat1 = vw1/(1-beta1**epoch)
        
        sw1 = beta2 * sw1 +(1-beta2) * (dw1 ** 2)
        sw_hat1 = sw1/(1-beta2**epoch)
                
        w1 = w1 - (learning_rate / (np.sqrt(sw_hat1)+epsilon))* vw_hat1
        
        # update b1
        # dE/db1 = dE/de * de/do4 * do4/dnet4 * dnet4/do3 * do3/dnet3 * dnet3/do2 * do2/dnet2 * dnet2/do1 * do1/dnet1 * dnet1/db1 = e * -1 * fprim_net4 * w4 * fprim_net3 * w3 * fprim_net2 * w2 * fprim_net1 * 1
        db1 = -1 * learning_rate * output_layer_error * np.dot(np.dot(np.dot(w4_old.T,diag_matrix_fprim_net3),np.dot(w3_old.T,diag_matrix_fprim_net2)),np.dot(w2_old.T,diag_matrix_fprim_net1)) * 1
        
        vb1 = beta1 * vb1 + (1 - beta1) * db1
        vb_hat1 = vb1/(1-beta1**epoch)
        
        sb1 = beta2 * sb1 +(1-beta2) * (db1 ** 2)
        sb_hat1 = sb1/(1-beta2**epoch)
                
        b1 = b1 - (learning_rate / (np.sqrt(sb_hat1)+epsilon))* vb_hat1
        
        error_data_train[i] = output_layer_error
        total_error += np.abs(output_layer_error)
    
    mse_train[epoch-1] = np.mean(error_data_train ** 2)
    
    # Testing the trained MLP for regression 
    error_data_test = np.zeros(len(X_test))
    output_data_test = np.zeros(len(X_test))
    for i in range(len(X_test)):
        input_layer = X_test[i:i+1] 
        net1 = np.dot(input_layer, w1) +b1   # net1 = x * w1 +b1
        o1   = activation_function(net1) #  o1 = f(net1)

        net2 = np.dot(o1, w2) + b2 # net2 = o1 * w2 + b2 
        o2   = activation_function(net2) #  o2 = f(net2)

        net3 = np.dot(o2, w3) + b3   # net3 = o2 * w3 + b3
        o3   = activation_function(net3) #  o3 = f(net3)

        net4 = np.dot(o3, w4) + b4  # net4 = o3 * w4 + b4
        o4   =  net4       
    
        output_data_test[i] = o4
        error_data_test[i] = y_test[i] - o4 
    
    mse_test[epoch-1] = np.mean(error_data_test ** 2)
        
    # Plotting the training data and output
    plt.figure(figsize=(20, 8))
    plt.subplot(2, 2, 1)
    plt.plot(y_train)
    plt.plot(output_data_train, 'r', linewidth=0.5)
    plt.xlabel('Train Data')
    plt.ylabel('Output')
    plt.legend(['Actual', 'Predicted'])

    # Plotting the training MSE
    plt.subplot(2, 2, 2)
    plt.semilogy(np.arange(1, epoch ), mse_train[:epoch-1])
    plt.xlabel('Epoch')
    plt.ylabel('MSE Train') 
    
    print('Epoch: {} \t'.format(epoch))
    print('total_error: ',total_error)
    print('MSE_train: {:.4f}'.format(mse_train[epoch-1]))
    
    plt.tight_layout()
    plt.show()
      
    print("\n\033[1;m" + "*" * 125)
         
        
print("****************************** Training completed *******************************")


In [None]:
#because epoch starts from 1 to epochs+1
epoch = epoch-1
# Plotting the training data and output
plt.figure(figsize=(20, 8))
plt.subplot(2, 2, 1)
plt.plot(y_train)
plt.plot(output_data_train, 'r', linewidth=0.5)
plt.xlabel('Train Data')
plt.ylabel('Output')
plt.legend(['Actual', 'Predicted'])

# Plotting the training MSE 
plt.subplot(2, 2, 2)
plt.semilogy(np.arange(1, epoch + 1), mse_train[:epoch])
plt.xlabel('Epoch')
plt.ylabel('MSE Train') 


# Plotting the test data and output
plt.figure(figsize=(20, 8))
plt.subplot(2, 2, 1)
plt.plot(y_test)
plt.plot(output_data_test, 'r', linewidth=0.5)
plt.xlabel('Test Data')
plt.ylabel('Output')
plt.legend(['Actual', 'Predicted'])

# Plotting the test MSE
plt.subplot(2, 2, 2)
plt.semilogy(np.arange(1, epoch + 1), mse_test[:epoch])
plt.xlabel('Epoch')
plt.ylabel('MSE Test')  


print('MSE_train: ',mse_train[epoch])
print('MSE_test: ',mse_test[epoch])  

In [None]:
plt.figure(2)
m_train , b_train = np.polyfit(y_train, output_data_train, 1)    
plt.scatter(y_train, output_data_train,facecolors='none',edgecolors='#104E8B')
plt.plot(y_train, m_train*y_train+b_train,'r') 
plt.title('Regression Train') 

plt.figure(3)
m_test , b_test = np.polyfit(y_test, output_data_test, 1)  
plt.scatter(y_test, output_data_test,facecolors='none',edgecolors='#104E8B')
plt.plot(y_test, m_test*y_test+b_test,'r')
plt.title('Regression Test')
 
plt.tight_layout()
plt.show()

mse_train_result = mse_train[-1]
mse_test_result = mse_test[-1]

print("Final MSE on Train Data:", mse_train_result)
print("Final MSE on Test Data:", mse_test_result)