In [1]:
from MLP import *
from dataManipulation import *
from activationFunctions import *
import MLP_bias

In [34]:
import numpy as np
from activationFunctions import *
from dataManipulation import * 

class Perceptron(object):
    def __init__(self,  num_inputs: int, num_neurons: list, num_outputs: int, func_activation: list, eta = 0.01) -> None:
        """ 
        Initializes the multi-layer perceptron with the specified architecture.

        Args:
        - num_inputs (int): Number of input features.
        - num_neurons (list of int): Number of neurons in each layer.
        - num_output (int): Number of output features.
        - func_activation (list): List of activation functions for each layer.

        """

        # Set the size of the network
        n_layers = [num_inputs] + num_neurons + [num_outputs]
        self.num_layers = len(n_layers)
        self.layers = [0]*self.num_layers
        self.num_neurons = n_layers
        

        # Define all of the layers
        input = num_inputs
        for i, neuron in enumerate(n_layers):
            self.layers[i] = Layer(input, neuron, func_activation[i], eta)
            input = neuron

    def forward(self, input: np.ndarray) -> np.ndarray:
        """
        Performs the forward pass through the neural network.

        Args:
        - input (array): The input data for the forward pass.

        Returns:
        - output (array): The output prediction of the neural network.

        """
        input = input.reshape(-1,1)
        # Iterate through all neurons
        for i in range(self.num_layers):
            input = self.layers[i].forward(input)

        self.output = input
        return self.output
    
    def backward(self, error: np.ndarray) -> list:
        """
        Performs the backward pass to update the network's weights.

        Args:
        - error (array): The error in the prediction.

        Returns:
        - gradients_prom (list of float): List of average gradients for each layer.

        """
        #Initializing an array for the mean of the gradients in every layer
        gradients = [0] * self.num_layers

        # Backpropagation for output layer
        gradients[-1] = self.layers[-1].backward(-np.sum(error), 1, True)
        
        # Backpropagation for all the other layers
        for i in reversed(range(self.num_layers - 1)):
            gradients[i] = self.layers[i].backward(self.layers[i + 1].weights, gradients[i+1])

        return gradients

    def train_batch(self, data, epochs=50, iterations=10):
        """
        Train the neural network using mini-batch gradient descent.

        Args:
        - data (array): Input data with the last value as the target column.
        - epochs (int): Number of training epochs.
        - iterations (int): Number of mini-batch iterations per epoch.

        Returns:
        - gradients (list): List of gradients computed during training.
        - gradient_epochs (list): List of average gradients for each epoch.
        - instant_energy_train (list): List of instant energy values during training.
        - instant_average_energy_train (list): List of average energy values during training.
        - instant_average_energy_test (list): List of average energy values during testing.

        """
        # Split data into training and testing sets
        train, test, _ = train_test_val(data, (75, 25, 0))

        # Initialize counters and auxiliary variables
        counter = 0
        data_len = len(train)
        gradients = [0] * iterations * epochs;   gradient_epochs = [0] * iterations
        instant_energy_train = [0] * epochs * iterations;  instant_average_energy_train = [0] * iterations
        instant_average_energy_test = [0] * epochs * iterations

        
        # Split the training data into input and target
        train_x, train_y = train[:, 0:-1], train[:, -1]
        test_x, test_y = test[:, 0:-1], test[:, -1]

        for iter in range(iterations):

            for epoch in range(epochs):
                # Forward pass for the entire training set
                y_pred = np.array([self.forward(x) for x in train_x])
                error = train_y - y_pred

                # Find the index with the largest error
                idx = np.argmax(error)

                # Compute instant energy and gradients for the mini-batch
                instant_energy_train[counter] = np.mean(error**2) / 2
                self.forward(np.array([train_x[idx, :]]))
                gradients[counter] = self.backward(error[idx])

                # Test error
                error_test = test_y - np.array([self.forward(x) for x in test_x])
                instant_average_energy_test[counter] = np.mean(error_test**2) / 2

                # Early stopping if test error is below a threshold
                if instant_average_energy_test[counter] < 0.02:
                    print("Paroooo")
                    counter += epochs-epoch-1
                    break
                elif instant_average_energy_test[counter] > instant_energy_train[counter] +0.5 :
                    # If the testing error is considerably higher than the training one, resample
                    print("Resampleooo")
                    train_x, train_y = train[:, 0:-1], train[:, -1]
                    test_x, test_y = test[:, 0:-1], test[:, -1]
                counter += 1

            # Compute average gradient and average energy for the current iteration
            #gradient_epochs[iter] = np.mean(gradients[iter * epochs:iter * epochs + epoch +1], axis=0)
            instant_average_energy_train[iter] = np.mean(instant_energy_train[iter * epochs:iter * epochs + epoch+1])

        return gradients, gradient_epochs, instant_energy_train, instant_average_energy_train, instant_average_energy_test


class Layer():
    def __init__(self, num_inputs: int, num_neurons:int, activation: object, eta:float) -> None:
        """
        Initializes a neural network layer.

        Args:
        - n_inputs (int): Number of input features.
        - n_neurons (int): Number of neurons in the layer.
        - activation (Activation): Activation function for the layer.

        """

        # Initialize weights randomly (you can uncomment the random initialization below)
        #self.weights = np.random.randn(num_neurons, num_inputs) * 2 - 1
        self.weights = np.ones((num_neurons, num_inputs))

        # Initialize bias randomly (you can uncomment the random initialization below)
        #self.bias = np.random.randn(num_neurons, 1) * 2 - 1
        self.bias = np.ones((num_neurons, 1))
        self.weights = np.hstack((self.weights, self.bias))
        
        # Set Activation function
        self.activation = activation  # Create an instance of the provided activation function
        # Learning rate (you can adjust this)
        self.eta = eta

    def forward(self, input: np.ndarray) -> np.ndarray:
        """
        Performs the forward pass through the layer.

        Args:
        - input (array): The input stimuli for the layer.

        """

        # Save the input input for later use in backward pass
        self.input = input

        # Compute the initial linear combination of input and weights
        self.field = np.matmul(self.weights, np.vstack((self.input,1)))

        # Apply the activation function to the linear combination
        self.output = self.activation.forward(self.field)
        return self.output
    
    def backward(self, weights_prev, local_gradient_prev, first = False):
        """
        Performs backpropagation for the layer.

        Args:
        - weights_prev (array): Weights from the next layer.
        - local_gradient_prev (array): Local gradient from the next layer.

        Returns:
        - local_gradient (array): Local gradient for this layer.

        """

        phi_prime = self.activation.backward(self.field)  # Compute the derivative of the activation function

        # Compute local gradient for this layer using chain rule and weights from the next layer
        local_gradient = np.multiply(phi_prime, np.dot(weights_prev[:,0:-1].T if not first else weights_prev, local_gradient_prev))

        # Compute weight update using the local gradient and input stimuli
        delta = np.dot(local_gradient, np.vstack((self.input,1)).T) # Weight and bias change
        assert delta.shape == self.weights.shape

        # Update weights using the learning rate and calculated delta
        weights_new = self.weights + self.eta * delta
        self.weights = weights_new

        return local_gradient

In [35]:
import scipy.io
mat = scipy.io.loadmat('datosIA.mat')

data = np.column_stack([mat["X"], mat["OD"], mat["S"]])
data = normalize_min_max(data)

# X = [X, OD], Y = [S]
train, _, valid = train_test_val(data, (80,0,20))

In [36]:
n_neurons = [3]
n_activation = [linear(0.3), sigmoid(1.5), tanh(0.3)]

MLP = Perceptron(2,n_neurons, 1, n_activation,0.3)
grad, grad_epoc, inst_ener_train, inst_aver_ener_train, inst_aver_ener_test = MLP.train_batch(train,50,5)

In [37]:
print(np.mean(inst_aver_ener_test))
np.array([MLP.forward(x) for x in valid[:,0:-1]])

0.39522597750728033


array([[[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [[1.]],

       [

In [39]:
MLP.layers[1].weights

array([[ 42.07537083,  42.07537083, 136.28736133],
       [ 42.07537083,  42.07537083, 136.28736133],
       [ 42.07537083,  42.07537083, 136.28736133]])

In [None]:
mat = scipy.io.loadmat('datosIA.mat')

data = np.column_stack([mat["X"], mat["OD"], mat["S"]])
data = normalize_min_max(data)

# X = [X, OD], Y = [S]
train, _, valid = train_test_val(data, (80,0,20))