# Classes and functions to build NN's with different features for testing
Structure: Inputlayer-(FCL-ActivationLayer)-(FCL-ActivationLayer)...(FCL-ActivationLayer)-OutputLayer

In [474]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow
np.random.seed(1)
tensorflow.random.set_seed(1)
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, f1_score

 ## Class for layer

In [475]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [476]:
def softmax(x):
    if x.ndim == 1:
        # If x has only one dimension, convert it to a 2D array with a single row
        x = x.reshape(1, -1)

    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / np.sum(e_x, axis=1, keepdims=True)

def softmax_prime(x):
    softmax_output = softmax(x)
    return softmax_output * (1 - softmax_output)


## Class for fully connected layer

In [477]:
class FCLayer():
    # input_size (i) = number of input neurons
    # output_size (j) = number of output neurons
    def __init__(self, input_size, output_size):
        #start by setting random weights and biases
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns y_vector for a given x_vector
    def forward_propagation(self, input_data):
        #reshape x into a row vector
        self.input = input_data.reshape(1,-1)
        #calculate output of layer: 
        # y = xw+b
        #print ("weight size ") 
        #print(self.weights.size) 
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output
    
    # computes dE/dW, dE/dB for a given output_error=dE/dY. 
    # Returns input_error=dE/dx.
   
    
    def backward_propagation(self, output_error, learning_rate):
        # dE/dx - this is the output of this function
        input_error = np.dot(output_error, self.weights.T)
        
     
        # (dE/dW = x^T * dE/dy)
        weights_error = np.dot(self.input.T, output_error)
        # db/dx = dE/dy = output_error

        # update weights
        self.weights -= learning_rate * weights_error
        #update biases
        self.bias -= learning_rate * output_error
        return input_error

## Activation layer

### input:   
activation function for the forward propagation  
the derivative of the activation function for the backward propagation  

### forward_propagation:  
   
input: $\underline{y}$ of size $j$ x $1$  
output: $ \underline{y_{act}} $  of size $j$ x $1$
  
### backward_propagation: 
   
input: $ \frac{d\underline{E}}{d\underline{y}} $ of size $1$ x $j$  
output: $ \frac{d\underline{E}}{d\underline{y_{act}}} * f'(\underline{y_{act}}) $ of size $1$ x $j$  
this is element-wise multiplication, so $\frac{d\underline{E}}{d{y_{act, k}}}$ * $f'(y_{act, k})$ etc.  
the derivative of the activation function is with respect to its input, in this case $y_{act}$ (see calculation below)

In [584]:
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
        self.input = None
        self.output = None

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error



## Activation functions and their derivatives

$\tanh(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}$  
  
$\tanh'(x) = \frac{{(e^x+e^{-x})(e^x+e^{-x})-(e^x-e^{-x})(e^x-e^{-x})}}{{(e^x+e^{-x})^2}} = \frac{{(e^x+e^{-x})^2-(e^x-e^{-x})^2}}{{(e^x+e^{-x})^2}}= 1-\frac{{(e^x-e^{-x})^2}}{{(e^x+e^{-x})^2}} = 1 - \tanh^2(x)$  

$relu'(0) = 1$ since $\lim_{{x \to 0^-}} = 1 $ and $ \lim_{{x \to 0^+}} = 0$ , we have to pick, by convention we pick 1  
  
$sigmoid(x) = \frac{1}{1 + e^{-x}}$  
  
$sigmoid'(x) = \frac{e^{-x}}{(e^{-x}+1)^2}$

In [479]:
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1-np.tanh(x)**2

def relu(x):
    return np.maximum(0, x)
    
def relu_prime(x):
    return np.where(x > 0, 1, 0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    sigmoid_x = sigmoid(x)
    return sigmoid_x * (1 - sigmoid_x)

activation_functions = [tanh, tanh_prime, relu, relu_prime]

## Loss function and its derivative  

### Cross-entropy
$loss(y_{true}, y_{pred}) = -\frac{1}{N} \sum_{i=1}^{N} \left( y_{true}^{(i)} \log(y_{pred}^{(i)}) + (1 - y_{true}^{(i)}) \log(1 - y_{pred}^{(i)}) \right) $

$\frac{d loss(y_{true}, y_{pred})}{dy_{pred}}= \frac{y_{pred} - y_{true}}{y_{pred} \cdot (1 - y_{pred}) \cdot j }$  
  
$j$ is the size of vector  $y$

In [480]:
def cross_entropy(y_true, y_pred):
    epsilon = 1e-15  # small constant to avoid division by zero
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # clip predictions to avoid numerical instability
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def cross_entropy_prime(y_true, y_pred):
    epsilon = 1e-15  # small constant to avoid division by zero
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # clip predictions to avoid numerical instability
    return (y_pred - y_true) / (y_pred * (1 - y_pred) * y_true.size)

## NN class

In [481]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None
        self.configuration= None
        self.act = None
        self.act_prime = None
        self.epoch_error = None
      
    
  
    def set_activation_index(self, a):
        self.act = a
    
    def set_activation_prime_index(self, b):
        self.act_prime = b

    #add the configuration
    def set_config(self, c):
        self.configuration = c
        
    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set up the loss functions
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)

        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)
        return result

    # train the network 
    def fit(self, x_train, y_train, epochs, learning_rate):
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))
            self.epoch_error = err

    

# Comparing different configurations

## Data input and management

In [482]:
DB_data = pd.read_excel("Dry_Bean_Dataset.xlsx")

In [483]:
x_total = DB_data.drop(['Class'], axis=1) # everything except the class # DB for Dry beans
y = DB_data['Class']  # only the class


In [484]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse=False)

# Reshape the input vector to a 2D array with a single column
input_array = [[item] for item in y]

# Fit and transform the input array using the encoder
y_total = encoder.fit_transform(input_array)



In [485]:
from sklearn.discriminant_analysis import StandardScaler
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_total, y_total, test_size=0.1, random_state=1)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=1)

#scale
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_val = scaler.transform(x_val)
x_test = scaler.transform(x_test)

In [486]:
y.unique()

array(['SEKER', 'BARBUNYA', 'BOMBAY', 'CALI', 'HOROZ', 'SIRA', 'DERMASON'],
      dtype=object)

In [487]:
y_train

array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 0., 0.]])

In [488]:
y_test

array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]])

In [489]:
y_val

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]])

In [490]:
x_train

array([[-0.45230498, -0.56642518, -0.83998333, ...,  1.49283775,
         1.80624901,  0.85834145],
       [-0.31036032, -0.29539358, -0.14142003, ..., -0.35725861,
        -0.54563597,  0.14303312],
       [ 2.986967  ,  2.66320004,  2.3909027 , ..., -1.24784071,
         0.07350478, -0.23572825],
       ...,
       [ 0.72137049,  0.87385458,  1.01284472, ..., -1.03144271,
        -0.73141171, -0.97885949],
       [-0.16323619, -0.06995348, -0.07942365, ..., -0.24691538,
        -0.17826263, -0.04219919],
       [ 0.66856017,  0.91924172,  1.0724145 , ..., -1.13677331,
        -0.99127158, -1.96737601]])

## Define metric F1

In [491]:
def calculate_f1(actual_labels, predicted_values):
    assert len(actual_labels) == len(predicted_values), "Number of labels and predictions should match."
    
    num_classes = len(set(actual_labels))  # Number of unique classes
    
    true_positives = [0] * num_classes
    false_positives = [0] * num_classes
    false_negatives = [0] * num_classes
    
    for i in range(len(actual_labels)):
        true_class = actual_labels[i]
        predicted_class = predicted_values[i]
        
        if true_class == predicted_class:
            true_positives[true_class] += 1
        else:
            false_positives[predicted_class] += 1
            false_negatives[true_class] += 1
    
    f1_scores = []
    
    for class_idx in range(num_classes):
        tp = true_positives[class_idx]
        fp = false_positives[class_idx]
        fn = false_negatives[class_idx]
        
        if tp + fp > 0:
            precision = tp / (tp + fp)
            recall = tp / (tp + fn)
            
            if precision + recall > 0:
                f1 = 2 * (precision * recall) / (precision + recall)
                f1_scores.append(f1)
            else:
                f1_scores.append(0)
    
    if len(f1_scores) > 0:
        f1_macro = sum(f1_scores) / len(f1_scores)
    else:
        f1_macro = 0
    
    return f1_macro


## Find the best structure for our network

10, 50, 100 or 200 nodes per layer  
2, 3 or 4 hidden layers  
0.3, 0.5 or 0.9 treshold  
2 activation functions for hidden layers: tanh and relu  

generate all combinations  
calculate f1 for each  
find best hyperparameters.

In [492]:
def generate_permutations(nums):
    if len(nums) == 0:
        return [[]]  # Base case: an empty list has one permutation, an empty list itself
    
    permutations = []
    
    for i in range(len(nums)):
        current_num = nums[i]
        remaining_nums = nums[:i] + nums[i+1:]
        
        for permutation in generate_permutations(remaining_nums):
            permutations.append([current_num] + permutation)
    
    return permutations


In [493]:
def create_configurations(nodes_per_layer_options, layers):
    configs = []
    node_options = generate_permutations(nodes_per_layer_options)
    for i in layers:
        # Extract the first i columns and store unique values in a set
        unique_values = set(tuple(row[:i]) for row in node_options)
        # Convert the set back to a list of lists
        extracted_columns = [list(values) for values in unique_values]
        configs.append(extracted_columns)
        #Print the extracted columns
        #print(i, "layers")
        #for column in extracted_columns:
            #print( column)
    return configs

In [498]:
def create_network(nodes, activation_index, activation_prime_index):
    net = Network()
    net.set_activation_index(activation_index)
    net.set_activation_prime_index(activation_prime_index)
    net.add(FCLayer(x_total.shape[1], nodes[0]))  # input layer
   # print("input layer ", nodes[0] )

    for i in range(len(nodes)-1):
        net.add(FCLayer(nodes[i], nodes[i+1])) 
     #   print("hidden layer in", nodes[i], "out", nodes[i+1] )
        net.add(ActivationLayer(activation_functions[activation_index], activation_functions[activation_prime_index]))
 
    net.add(FCLayer(nodes[-1], 7))  # output layer
    net.add(ActivationLayer(softmax, softmax_prime))
  #  print("output layer ", nodes[-1] )
  
    return net

In [495]:
nodes_options = [10, 50, 100, 200]
layer_options = [2] #,3,4 - reduced out of time constraints
treshold = [0.5, 0.8, 0.9]
configurations= create_configurations(nodes_options, layer_options)
nns_tanh = []
nns_relu = []
data = []
top = []

In [496]:
def generate_tanh(c_i):
    #activation_functions = [tanh, tanh_prime, relu, relu_prime]
    for x in c_i:
        c = create_network(x,0,1)
        c.set_config(x)
        nns_tanh.append(c)  
    

In [497]:
def generate_relu (c_i):
      for x in c_i:
        c = create_network(x, 2, 3)
        c.set_config(x)
        nns_relu.append(c)
  

In [502]:
def hyperparameter_search(nns):
    epochs = [10, 25, 50]
    for epoch in epochs:
            for nets in nns:
                nets.use(cross_entropy, cross_entropy_prime)
                nets.fit(x_train, y_train, epochs=epoch, learning_rate=0.1)
                pred = nets.predict(x_val)
                pr = np.squeeze(pred)
                predicted_classes = [np.argmax(p) for p in pr]
                predicted_classes=np.array(predicted_classes)
                predicted_classes.size
                #Convert one-hot encoded labels to natural unencoded form
                y_val_unencoded = np.argmax(y_val, axis=1)
                f1_s = calculate_f1(y_val_unencoded, predicted_classes)
                top.append([nets.configuration, nets.act, nets.act_prime, epoch, f1_s])


In [503]:
for i in range(len(layer_options)):
   
    generate_tanh(configurations[i])
    generate_relu(configurations[i])

    hyperparameter_search(nns_tanh)
    hyperparameter_search(nns_relu)

epoch 1/10   error=0.293970
epoch 2/10   error=0.282704
epoch 3/10   error=0.271198
epoch 4/10   error=0.289111
epoch 5/10   error=0.288036
epoch 6/10   error=0.291155
epoch 7/10   error=0.278894
epoch 8/10   error=0.286845
epoch 9/10   error=0.287199
epoch 10/10   error=0.279921
epoch 1/10   error=0.216025
epoch 2/10   error=0.172822
epoch 3/10   error=0.163482
epoch 4/10   error=0.156029
epoch 5/10   error=0.158366
epoch 6/10   error=0.159641
epoch 7/10   error=0.161118
epoch 8/10   error=0.156946
epoch 9/10   error=0.158665
epoch 10/10   error=0.165652
epoch 1/10   error=0.254026
epoch 2/10   error=0.227960
epoch 3/10   error=0.236319
epoch 4/10   error=0.234605
epoch 5/10   error=0.225878
epoch 6/10   error=0.220341
epoch 7/10   error=0.225259
epoch 8/10   error=0.233419
epoch 9/10   error=0.239115
epoch 10/10   error=0.225831
epoch 1/10   error=0.207208
epoch 2/10   error=0.187590
epoch 3/10   error=0.193702
epoch 4/10   error=0.197448
epoch 5/10   error=0.195540
epoch 6/10   erro

KeyboardInterrupt: 

In [504]:
pd.DataFrame(top).to_csv('dry-beans-top2.csv')

In [574]:
sorted_top = pd.read_csv("dry-beans-top2.csv")
sorted_top = sorted_top.sort_values(sorted_top.columns[-1], ascending=False)

sorted_top = sorted_top.drop(sorted_top.columns[0], axis=1)
sorted_top.iloc[:, 0] = sorted_top.iloc[:, 0].apply(eval).apply(np.array)


In [577]:
sorted_top

Unnamed: 0,0,1,2,3,4
199,"[200, 50, 10]",2,3,10,0.862763
186,"[100, 10]",2,3,10,0.856261
258,"[100, 10]",2,3,50,0.853479
196,"[100, 10, 200]",2,3,10,0.851031
42,"[100, 10]",2,3,10,0.850723
...,...,...,...,...,...
139,"[50, 100, 10]",0,1,25,0.274042
231,"[10, 200, 100]",2,3,25,0.272026
266,"[50, 10, 100]",2,3,50,0.242262
287,"[100, 50, 200]",2,3,50,0.209590


In [580]:
tops=sorted_top.iloc[0]
tops[0].shape


(3,)

In [585]:
#0 = config, 1= act, 2=act_prime, 3 = epochs
mdl = create_network(tops[0], tops[1], tops[2])
mdl.use(cross_entropy, cross_entropy_prime)
mdl.fit(x_test, y_test, tops[3], 0.1)

prd = mdl.predict(x_test)
prds = np.squeeze(prd)
predicted_c = [np.argmax(p) for p in prds]
predicted_c=np.array(predicted_c)
#Convert one-hot encoded labels to natural unencoded form
y_test_unencoded = np.argmax(y_test, axis=1)
f1_best = calculate_f1(y_test_unencoded, predicted_c)
print(f1_best)

epoch 1/10   error=0.194821
epoch 2/10   error=0.127186
epoch 3/10   error=0.119959
epoch 4/10   error=0.108959
epoch 5/10   error=0.102906
epoch 6/10   error=0.098577
epoch 7/10   error=0.103645
epoch 8/10   error=0.101885
epoch 9/10   error=0.091347
epoch 10/10   error=0.085323
0.9196971736738292


### 5 fold cross-validation

In [586]:
from sklearn.model_selection import KFold
def cross_validation(x, y, tops):
    kfold = KFold(n_splits=5)
    scores = []
    # enumerate splits
    for (train, test) in kfold.split(x,y):
        x_tr = x[train]
        y_tr = y[train]
        x_tst = x[test]
        y_tst = y[test]
        model = create_network(tops[0], tops[1], tops[2])
        model.use(cross_entropy, cross_entropy_prime)
        model.fit(x_tr, y_tr, tops[3], 0.1)
        y_pred = model.predict(x_tst)
       
        y_prds = np.squeeze(y_pred)
        y_predicted_c = [np.argmax(p) for p in y_prds]
        y_predicted_c=np.array(y_predicted_c)
        #Convert one-hot encoded labels to natural unencoded form
        y_tst_u = np.argmax(y_tst, axis=1) 
        f1_s = calculate_f1(y_tst_u, y_predicted_c)   
        scores.append(f1_s)
    return scores

In [587]:
scores = cross_validation(x_train, y_train, tops)
print(scores)
np.mean(scores)

epoch 1/10   error=0.126974
epoch 2/10   error=0.085613
epoch 3/10   error=0.101404
epoch 4/10   error=0.114387
epoch 5/10   error=0.103059
epoch 6/10   error=0.107093
epoch 7/10   error=0.097717
epoch 8/10   error=0.097313
epoch 9/10   error=0.102436
epoch 10/10   error=0.081265
epoch 1/10   error=0.109075
epoch 2/10   error=0.080886
epoch 3/10   error=0.076019
epoch 4/10   error=0.077111
epoch 5/10   error=0.079204
epoch 6/10   error=0.073154
epoch 7/10   error=0.069596
epoch 8/10   error=0.067829
epoch 9/10   error=0.066739
epoch 10/10   error=0.065993
epoch 1/10   error=0.124626
epoch 2/10   error=0.092172
epoch 3/10   error=0.085072
epoch 4/10   error=0.076560
epoch 5/10   error=0.075950
epoch 6/10   error=0.107191
epoch 7/10   error=0.099297
epoch 8/10   error=0.085380
epoch 9/10   error=0.074166
epoch 10/10   error=0.073799
epoch 1/10   error=0.134605
epoch 2/10   error=0.122836
epoch 3/10   error=0.082608
epoch 4/10   error=0.075533
epoch 5/10   error=0.072735
epoch 6/10   erro

0.8858427892185144

## NN from library

## extra: how we chose the learning-rate

In [526]:
import numpy as np
import matplotlib.pyplot as plt

#uncommented the print epoch error, see errors from epoch to epoch

hand = create_network([x_total.shape[1], 10, 5], 0, 1)
hand.use(cross_entropy, cross_entropy_prime)
hand.fit(x_train, y_train, 10, 0.1)

preds=hand.predict(x_test)

#preds_arr = np.array(preds)
pr = np.squeeze(preds)

predicted_classes = [np.argmax(p) for p in pr]
predicted_classes=np.array(predicted_classes)
predicted_classes.size

print("f1 hand")
 #Convert one-hot encoded labels to natural unencoded form
y_test_unencoded = np.argmax(y_test, axis=1)
f1 = calculate_f1(y_test_unencoded, predicted_classes )
print(f1)



epoch 1/10   error=0.336686
epoch 2/10   error=0.275489
epoch 3/10   error=0.251200
epoch 4/10   error=0.247318
epoch 5/10   error=0.246717
epoch 6/10   error=0.230467
epoch 7/10   error=0.216032
epoch 8/10   error=0.231538
epoch 9/10   error=0.242602
epoch 10/10   error=0.250488
f1 hand
0.678297798797842


In [None]:
# Define the range of learning rates to explore
learning_rates = np.logspace(-6, 0, num=10)

# Initialize lists to store learning rates and corresponding losses
lr_values = []
loss_values = []

for lr in learning_rates:
    
    # Train your neural network for a few epochs with the current learning rate
    hand.fit(x_train, y_train, 50, lr)
    # Calculate and store the loss/error after each epoch
    loss = hand.epoch_error
    lr_values.append(lr)
    loss_values.append(loss)

# Plot the learning rate vs. loss curve
plt.plot(lr_values, loss_values)
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.xscale('log')
plt.show()