In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def parse_training_data(path):
    with open(path, 'r') as file:
        line = file.readline().strip()
        line_counter = 0
        while line:
            line_counter += 1

            if line_counter == 2: # the line contains the values of P, N, M
                line = line[1:] # skip the # sign at the beginning of the line
                line_segments = line.split(' ')
                for segment in line_segments:
                    segment = segment.strip()
                    if segment != '':
                        if segment[0] == 'P': # P=<P_VALUE>
                            P = int(segment[2:])
                        elif segment[0] == 'N': # N=<N_VALUE>
                            N = int(segment[2:])
                        elif segment[0] == 'M': # M=<M_VALUE>
                            M = int(segment[2:])
                
                X_train = np.empty((P, N))
                Y_train = np.empty((P, M))
            
            elif line_counter >= 3: # contains training data as well as desired output starting at line 3
                # line_segments would be smth. like:
                # ['1.0', '1.0', '1.0', '', '0.9']
                # where the first N items are the values
                # for input data dimensions and the
                # last M items are the last layer's
                # outputs

                line_segments = line.split(' ')
                X_train[line_counter-3] = np.asarray(list(map(float, line_segments[:N])))
                Y_train[line_counter-3] = np.asarray(list(map(float, line_segments[-M:])))
                
            line = file.readline().strip()
    
    return P, N, M, X_train, Y_train

In [None]:
def logistic_function(z):
    return 1/(1+np.exp(-z))

In [None]:
def logistic_first_derivative(z):
    f = logistic_function(z)
    return f*(1-f)

In [None]:
def MSE(P, Y_train, predictions):
    mean_squared_error = 0
    for p in range(P): # iterate over all samples
        mean_squared_error += ((Y_train[p]-predictions[p])**2) / P
    
    return mean_squared_error

## Perceptron implementation

In [None]:
mean_squared_errors_different_data = []

In [None]:
'''
data_path:
             the path to one of files
            containing training data

P: 
             the number of training 
            samples the input data contain

N:
             the dimension of the
            input data

M:
             the dimension of the 
            network's output (i.e. the # of
            output layer's neurons)

X_train:
            is a matrix of shape (P, N), 
            containing the input data features

Y_train:
            is a matrix of shape (P, M),
            contating the desired output data
            (ground truth)

w_nm[N][M]:   
             the weight going from Nth neuron
            of layer n to Mth neuron of layer m

out_n[N]:
             the output of Nth neuron of layer n

net_m[M]:
            the output of the Mth neuron of the layer m (output layer)
'''

data_path = './PA-A_training_data_06.txt'
P, N, M, X_train, Y_train = parse_training_data(data_path)

w_nm = np.random.rand(N+1, M) #range [0, 1]
w_nm = w_nm - 0.5 #range [-0.5, 0.5]

out_n = np.empty((P, N+1))
out_n[:,0] = 1
out_n[:,1:] = X_train

net_m = np.random.randn(M)

!!! Please note that, in order not to have to consider BIAS-WEIGHT separately, we consider it as a conventional weight, located at index 0. That's why in the above cell, the first dim is of size *(N+1)* and *out_n[0] = 1*

## Perceptron learning (delta rule)

In [None]:
def forward_pass(p, out_n, w_nm):
    '''
    Performs forward-pass for a single train sample

    p:  the sample number, we tend to perform
        forward-pass for

    out_n:  the output of previous layer
            shape: (P, N)
    
    w_nm:  the weights of neurons from previous
        layer connecting to this layer
        shape: (N, M)
    '''
    
    output = np.empty(w_nm.shape[1])
    sum = np.zeros(w_nm.shape[1])
    for m in range(M): # iterate over output layer's neurons
        for n in range(w_nm.shape[0]): # iterate over the weights between input n and output m
            sum[m] += out_n[p][n]*w_nm[n][m]
        
        output[m] = logistic_function(sum[m])
    
    return sum, output

In [None]:
def update_weights(w_nm, delta):
    '''
    Update the weights based on delta rule
    '''
    w_nm_new = w_nm + delta
    return w_nm_new

In [None]:
def backward_pass(p, N, M, net_m, out_m, out_n, Y_train, w_nm, eta):
    '''
    Performs backward-pass for a single train sample

    p:  the sample number, we tend to perform
        backward-pass for

    N:  the total number of last layer's neurons

    M:  the total number of this layer's neurons
        (i.e. number of output neurons for this layer)

    net_m:  the output of layer m before applying
        transfer function (i.e. weighted sum)

    out_m:  the output of layer m after applying
        transfer function (i.e. f(net_m))

    out_n:  the output of previous layer
            shape: (P, N)

    Y_train: contatins the ground truth
    
    w_nm:  the weights of neurons from previous
        layer connecting to this layer
        shape: (N, M)

    eta: learning rate (step size)
    '''
    delta = np.empty(M)
    delta_w_nm = np.empty((N+1,M))
    for n in range(N+1):
        for m in range(M):
            delta[m] = (Y_train[p][m]-out_m[m])*logistic_first_derivative(net_m[m])
            delta_w_nm[n][m] = eta * delta[m] * out_n[p][n]
            w_nm[n][m] = update_weights(w_nm[n][m], delta_w_nm[n][m])

In [None]:
eta = 0.5 # learning rate
EPOCHS = 200

In [None]:
mean_squared_errors_list = [] # used to track of MSE in each iteration for further plotting
for epoch in range(EPOCHS):
    predictions = np.empty((P, M)) # store the output of the model for whole samples, so this can be used further to evaluate the model
    for p in range(P): # iterate over training samples

        net_m, out_m = forward_pass(p, out_n, w_nm)
        predictions[p] = out_m
        backward_pass(p, N, M, net_m, out_m, out_n, Y_train, w_nm, eta)

    mean_squared_error = MSE(P, Y_train, predictions)
    mean_squared_error_average = np.mean(mean_squared_error)
    mean_squared_errors_list.append(mean_squared_error_average)

    if epoch % int(EPOCHS/10) == 0:
        print(f'Epoch:{epoch}, MSE={mean_squared_error}, avg: {mean_squared_error_average:.4f}')

mean_squared_errors_different_data.append({
    data_path.split('/')[-1].split('.')[0]: mean_squared_errors_list
})