In [1]:
import numpy as np

In [2]:
class Multilayer_Perceptron:
    
    # `layer_arch` gives the number of nodes in every layer starting from the second one
    # `layer_arch` does not include the bias node, i.e., if layer_arch[layer] = 2, than 
    # that layer has 2 + 1(bias) = 3 layers
    def __init__(self, dataset, target, layer_arch, activation_function = (lambda x: 1 / (1 + np.exp(-x))), 
                 gradient_function = (lambda x: x*(1-x))):
        self.dataset = dataset
        # below we define the bias column
        self.dataset = [instance + [1] for instance in self.dataset]
        self.layer_arch = layer_arch        
        self.target = target
        self.activation_function = activation_function
        self.gradient_function = gradient_function   
                    
    def update_outs(self):
        for instance_index in np.arange(len(self.mini_batch)):
            for layer_index in np.arange(len(self.layer_arch)):
                layer_index = layer_index + 1
                weight = self.weights[layer_index-1].T
                out = self.outs[instance_index][layer_index-1]
                new_out_array = np.array(list(map(self.activation_function, weight @ out)))
                self.outs[instance_index][layer_index][:len(new_out_array)] = new_out_array
                
            self.deltas[instance_index][-1] = (self.mini_target[instance_index] - self.outs[instance_index][-1])\
            *np.array(list(map(self.gradient_function, self.outs[instance_index][-1])))
            
            for layer_inv_index in np.arange(len(self.layer_arch)-1):
                layer_inv_index += 1          
                weight = self.weights[-layer_inv_index]
                delta = self.deltas[instance_index][-layer_inv_index]
                if layer_inv_index > 1:
                    delta = self.deltas[instance_index][-layer_inv_index][:-1]
                out = self.outs[instance_index][-(layer_inv_index+1)]  
                delta_new_array = np.array(list(map(self.gradient_function, out)))*(weight @ delta) 
                self.deltas[instance_index][-(layer_inv_index + 1)] = delta_new_array
                
    def update_weights(self, momentum = 1, learning_rate = 0.5):
        for instance_index in np.arange(len(self.mini_batch)):
            for layer_inv_index in np.arange(len(self.layer_arch)):
                layer_inv_index += 1
                weight_original = self.weights[-layer_inv_index]
                delta = self.deltas[instance_index][-layer_inv_index]
                if layer_inv_index > 1:
                    delta = self.deltas[instance_index][-layer_inv_index][:-1]
                out = self.outs[instance_index][-(layer_inv_index+1)]
                increment = np.outer(out, delta)
                self.weights[-layer_inv_index] = momentum*weight_original + learning_rate*increment
                   
    def fit(self, epochs=100, batch_size=32, learning_rate=0.5, momentum=1, print_training_predictions=False):
        num_instances = len(self.dataset)
        counter = 0

        self.weights = [np.random.uniform(-1, 1, [len(self.dataset[0]), self.layer_arch[0]])]
        for index in np.arange(len(self.layer_arch))[:-1]:
            self.weights.append(np.random.uniform(-1, 1, [self.layer_arch[index]+1, self.layer_arch[index+1]]))
        
        while counter < epochs:
            # Shuffle the dataset and targets
            indices = np.arange(num_instances)
            np.random.shuffle(indices)
            shuffled_dataset = [self.dataset[i] for i in indices]
            shuffled_target = [self.target[i] for i in indices]
                
            for start_idx in range(0, num_instances, batch_size):
                end_idx = min(start_idx + batch_size, num_instances)
                self.mini_batch = shuffled_dataset[start_idx:end_idx]
                self.mini_target = shuffled_target[start_idx:end_idx]

                self.outs = [[instance] for instance in self.mini_batch]
                for instance_index in np.arange(len(self.mini_batch)):  
                    for index in np.arange(len(self.layer_arch)):
                        self.outs[instance_index].append(np.zeros(self.layer_arch[index]))             
                    for index in np.arange(len(self.layer_arch)-1):
                        self.outs[instance_index][index+1] = np.append(self.outs[instance_index][index+1],1)

                self.deltas = [[np.zeros(self.layer_arch[0]+1)] for ii in self.mini_batch]
                for instance_index in np.arange(len(self.mini_batch)):
                    for layer_index in np.arange(len(self.layer_arch)-1):
                        layer_index += 1
                        self.deltas[instance_index].append(np.zeros(self.layer_arch[layer_index]+1))
                    self.deltas[instance_index][-1] = self.deltas[instance_index][-1][:-1]
                    
                # Update outs and weights for each instance in the minibatch
                self.update_outs()
                self.update_weights(momentum=momentum, learning_rate=learning_rate)

            counter += 1

        final_outs = [sublist[-1][0] for sublist in self.outs]

        if print_training_predictions:
            for instance_index in range(len(self.dataset)):
                print(f'{self.dataset[instance_index][:-1]}, {final_outs[instance_index]}')
            
    def predict(self, new_instance):
        instance = [new_instance] 
        for index in np.arange(len(self.layer_arch)):
            instance.append(np.zeros(self.layer_arch[index]))             
        for index in np.arange(len(self.layer_arch)):
            instance[index] = np.append(instance[index],1)
    
        for layer_index in np.arange(len(self.layer_arch)):
            layer_index += 1
            weight = self.weights[layer_index-1].T
            out = instance[layer_index-1]
            new_out_array = np.array(list(map(self.activation_function, weight @ out)))
            instance[layer_index][:len(new_out_array)] = new_out_array
            
        print(f'{new_out_array[0]}')
                

In [3]:
# Test if the NN can learn the following pattern: x<0.38 or x>0.9=1, 
# and 0.38<x<0.9=0.3.

size_dataset = 1000
X = [[np.random.uniform(0, 1)] for ii in np.arange(size_dataset)]
y = [0.3 if (feature[0]<0.38 or feature[0]>=0.9) else 1 for feature in X]

In [5]:
inst = Multilayer_Perceptron(X, y, [5,5,1])

In [6]:
inst.fit(epochs = 10000)

In [12]:
inst.predict([0.39])

0.9946149343713591
