<a href="https://colab.research.google.com/github/hadaseshel/MLP/blob/main/MLP49.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1><center><b>Bio-intelligent Algorithms</></center></h1>
<h3><center>Exercise 1</center></h3>

The implementation of the exercise consists of several parts:
1. Getting data (train, validation, test and the trained model) from google drive.
2. The realization of the model, and the training of the model in the main function.
3. Calculation of the accuracy percentage of validation.
4. The model prediction for the test set.
5. Converting data from csv files to pickle.

#Instructions for performing inference on the test set:** <br>
Run cell at section 1 (Getting data from google drive), then run cell at section 4 (The model prediction for the test set)

#Instructions for performing train:** <br>
Run cell at section 1 (Getting data from google drive), then run cell at section 2 (The realization of the model, and the training of the model in the main function).


# 1. Getting data from google drive.
In this part we get the data: train, validation and test from the google drive.<br>
We converted the data into pickle files and used them.<br>
In addition, we get the model we have already trained and saved as a pickle, the model name is best_model0.487.pickle.

In [None]:
!gdown --id 1dpcyX8A4qNVvvtSvFrjg4PF9TvLp0iCe
!gdown --id 1CG3hKEbt7sX6D7Zn8cwb9OJ-Ij8u-qL4
!gdown --id 1Wcpf-ZPzqtccTp2Ry_6TWcRydth4n8l5
!gdown --id 1IT7zh9BBk-UMWE0PQlYyg_jlbhdmatco
import numpy as np
np.random.seed(42)

import pickle
import torch
# Define the device for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def read_train_from_pickle(file_name="./train.pickle"):
    with open(file_name, 'rb') as handle:
        (x, y) = pickle.load(handle)
    return x, y

def read_validation_from_pickle(file_name="./validation.pickle"):
    with open(file_name, 'rb') as handle:
        (x, y) = pickle.load(handle)
    return x, y

def read_test_from_pickle(file_name="./test.pickle"):
    with open(file_name, 'rb') as handle:
        x = pickle.load(handle)
    return x


Downloading...
From: https://drive.google.com/uc?id=1dpcyX8A4qNVvvtSvFrjg4PF9TvLp0iCe
To: /content/validation.pickle
100% 24.6M/24.6M [00:00<00:00, 103MB/s] 
Downloading...
From: https://drive.google.com/uc?id=1CG3hKEbt7sX6D7Zn8cwb9OJ-Ij8u-qL4
To: /content/train.pickle
100% 197M/197M [00:01<00:00, 98.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Wcpf-ZPzqtccTp2Ry_6TWcRydth4n8l5
To: /content/test.pickle
100% 24.6M/24.6M [00:00<00:00, 29.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1IT7zh9BBk-UMWE0PQlYyg_jlbhdmatco
To: /content/best_model0.487.pickle
100% 53.0M/53.0M [00:01<00:00, 36.0MB/s]


# 2. Running Multi layer perceptron with relu activation function.

**The architecture of the model:** <br>
Layers: 3072 -> 550 -> 298 -> 10<br>
Learning rate decay every 20 epochs by factor of 2,
initialization learning rate 0.001 <br>

<br>
Saving the weights of the model if it exceed 47% accuracy on validation.

In [None]:
import numpy as np


def xavier_init(in_dim, out_dim):
    xavier_stddev = np.sqrt(2.0 / (in_dim + out_dim))
    return np.random.normal(0.0, xavier_stddev, (out_dim, out_dim))

class SoftmaxLayer:
    def __init__(self):
        self.probs = None

    def forward(self, x):
        # Ensure numerical stability by subtracting the maximum value
        shifted_x = x - np.max(x, axis=0)

        # Exponentiate the shifted values
        exp_x = np.exp(shifted_x)

        # Compute the softmax probabilities
        self.probs = exp_x / np.sum(exp_x, axis=0)

        return self.probs

    def backward(self, grad_output):
        num_samples = grad_output.shape[1]

        # Compute the gradient of the softmax function
        grad_input = grad_output - np.sum(self.probs * grad_output, axis=0)
        grad_input /= num_samples

        return grad_input, None, None


class LinearLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size) * 0.01
        self.biases = np.zeros((output_size, 1))

    def forward(self, x):
        self.x = x
        return np.dot(self.weights, x) + self.biases

    def backward(self, grad_output):
        grad_input = np.dot(self.weights.T, grad_output)
        grad_weights = np.dot(grad_output, self.x.T)
        grad_biases = np.sum(grad_output, axis=1, keepdims=True)
        return grad_input, grad_weights, grad_biases


class ReLULayer:
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)

    def backward(self, grad_output):
        grad_input = grad_output * (self.x > 0)
        return grad_input, None, None

def forward(x,layers ):
    for layer in layers:
        x = layer.forward(x)
    return x
class MLP:
    def __init__(self, input_size, hidden_sizes, output_size):
        self.layers = []
        prev_size = input_size

        for size in hidden_sizes:
            self.layers.append(LinearLayer(prev_size, size))
            self.layers.append(ReLULayer())
            prev_size = size

        self.layers.append(LinearLayer(prev_size, output_size))
        # self.layers.append(SoftmaxLayer())

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, grad_output, learning_rate,epoch):
        for layer in reversed(self.layers):
            grad_output, grad_weights, grad_biases = layer.backward(grad_output)

            if grad_weights is not None and grad_biases is not None:
              # Compute the updated learning rate based on the epoch
                decay_factor = 0.5  # Factor by which the learning rate will decay
                decay_epochs = 20  # Number of epochs after which the learning rate will decay

                # Compute the updated learning rate
                lr = learning_rate * (decay_factor ** (epoch // decay_epochs))

                # Update weights and biases
                layer.weights -= lr * grad_weights
                layer.biases -= lr * grad_biases

        return grad_output

    def train(self, X_train, y_train, X_val, y_val, batch_size, learning_rate, epochs, validation_freq):
        num_samples = X_train.shape[0]
        best_accuracy = 0
        for epoch in range(epochs):
            for i in range(0, num_samples, batch_size):
                batch_X = X_train[i:i + batch_size]
                batch_y = y_train[i:i + batch_size]

                # Forward pass
                outputs = self.forward(batch_X.T)

                # Backward pass
                grad_output = 2 * (outputs - batch_y.T)
                self.backward(grad_output, learning_rate,epoch)
            # Compute accuracy on train and validation data
            train_outputs = self.forward(X_train.T)
            train_accuracy = np.mean(np.argmax(train_outputs, axis=0) == np.argmax(y_train.T, axis=0))

            val_outputs = self.forward(X_val.T)
            val_accuracy = np.mean(np.argmax(val_outputs, axis=0) == np.argmax(y_val.T, axis=0))
            print(
                f"Epoch {epoch + 1}/{epochs} - Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}")
            if val_accuracy > best_accuracy and val_accuracy > 0.47:
                weights = 'best_model' + str(val_accuracy) + '.pickle'
                best_accuracy = val_accuracy
                print(val_accuracy)
                with open(weights, 'wb') as f:
                    pickle.dump(self.layers, f)

def main():
    X_train, y_train = read_train_from_pickle()
    X_test, y_test = read_validation_from_pickle()
    # One-hot encode the target labels
    num_classes = 10
    y_train = np.eye(num_classes)[y_train]
    y_test = np.eye(num_classes)[y_test]

    # Define the MLP architecture
    input_size = X_train.shape[1]
    hidden_sizes = [550, 298]
    output_size = num_classes
    # mlp = MLP(input_size, hidden_sizes, output_size)
    mlp = MLP(input_size, hidden_sizes, output_size)

    # Training parameters
    batch_size = 128
    learning_rate = 0.001
    epochs = 80
    validation_freq = 1

    # Train the MLP model
    mlp.train(X_train, y_train, X_test, y_test, batch_size, learning_rate, epochs, validation_freq)


if __name__ == '__main__':
    main()



Epoch 1/80 - Train Accuracy: 0.2295, Validation Accuracy: 0.2190
Epoch 2/80 - Train Accuracy: 0.2751, Validation Accuracy: 0.2690
Epoch 3/80 - Train Accuracy: 0.3035, Validation Accuracy: 0.3130
Epoch 4/80 - Train Accuracy: 0.3294, Validation Accuracy: 0.3450
Epoch 5/80 - Train Accuracy: 0.3454, Validation Accuracy: 0.3430
Epoch 6/80 - Train Accuracy: 0.3633, Validation Accuracy: 0.3620
Epoch 7/80 - Train Accuracy: 0.3772, Validation Accuracy: 0.3690
Epoch 8/80 - Train Accuracy: 0.3854, Validation Accuracy: 0.3840
Epoch 9/80 - Train Accuracy: 0.3832, Validation Accuracy: 0.3800
Epoch 10/80 - Train Accuracy: 0.3887, Validation Accuracy: 0.3700
Epoch 11/80 - Train Accuracy: 0.3865, Validation Accuracy: 0.3780
Epoch 12/80 - Train Accuracy: 0.3991, Validation Accuracy: 0.3800
Epoch 13/80 - Train Accuracy: 0.4054, Validation Accuracy: 0.3870
Epoch 14/80 - Train Accuracy: 0.3996, Validation Accuracy: 0.3940
Epoch 15/80 - Train Accuracy: 0.3982, Validation Accuracy: 0.3870
Epoch 16/80 - Train

# 3. Calculation of the accuracy percentage of validation.
By running this cell you can view the percent accuracy of the model you trained on validation.

In [None]:
class LinearLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size) * 0.01
        self.biases = np.zeros((output_size, 1))

    def forward(self, x):
        self.x = x
        return np.dot(self.weights, x) + self.biases

    def backward(self, grad_output):
        grad_input = np.dot(self.weights.T, grad_output)
        grad_weights = np.dot(grad_output, self.x.T)
        grad_biases = np.sum(grad_output, axis=1, keepdims=True)
        return grad_input, grad_weights, grad_biases

class ReLULayer:
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)

    def backward(self, grad_output):
        grad_input = grad_output * (self.x > 0)
        return grad_input, None, None

def forward(x,layers ):
    for layer in layers:
        x = layer.forward(x)
    return x

with open('best_model0.487.pickle', 'rb') as handle:
        weights = pickle.load(handle)

X_val,y_val = read_validation_from_pickle()
y_val = np.eye(10)[y_val]

val_outputs = forward(X_val.T,weights)
val_accuracy = np.mean(np.argmax(val_outputs, axis=0) == np.argmax(y_val.T, axis=0))
print(val_accuracy)

0.487


# 4. The model prediction for the test set.
If you would like only to infer on our weights run the following cell - after getting the data from the first cell (In section 1).

In [None]:
class LinearLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size) * 0.01
        self.biases = np.zeros((output_size, 1))

    def forward(self, x):
        self.x = x
        return np.dot(self.weights, x) + self.biases

    def backward(self, grad_output):
        grad_input = np.dot(self.weights.T, grad_output)
        grad_weights = np.dot(grad_output, self.x.T)
        grad_biases = np.sum(grad_output, axis=1, keepdims=True)
        return grad_input, grad_weights, grad_biases

class ReLULayer:
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)

    def backward(self, grad_output):
        grad_input = grad_output * (self.x > 0)
        return grad_input, None, None

def forward(x,layers ):
    for layer in layers:
        x = layer.forward(x)
    return x


with open('best_model0.487.pickle', 'rb') as handle:
        weights = pickle.load(handle)
X_test = read_test_from_pickle()
test_outputs = forward(X_test.T,weights)
test_outputs = test_outputs.T
results = [np.argmax(x,axis=0) + 1 for x in test_outputs]
output = '\n'.join(str(x) for x in results)
print(output)
with open('output.txt', 'w') as f:
    f.write(output)


7
8
10
7
3
6
3
1
10
1
10
1
6
7
9
9
1
5
10
3
9
6
6
6
6
6
8
1
1
6
1
2
6
1
8
9
1
10
7
2
7
1
6
6
3
2
10
3
9
6
10
9
1
10
2
7
4
1
9
9
6
4
8
2
8
9
1
1
4
9
3
6
10
10
8
9
10
5
1
2
6
10
1
6
1
6
6
7
10
7
9
7
7
6
1
2
4
9
5
1
10
4
9
1
10
6
1
4
6
7
4
10
3
6
10
1
1
2
5
8
10
8
4
9
9
10
2
9
3
9
1
9
2
10
8
5
5
3
3
7
6
1
5
2
2
5
7
7
8
2
4
6
9
1
6
5
4
7
6
6
6
5
8
2
9
3
9
1
3
7
6
9
9
4
9
9
8
6
8
5
7
2
10
4
3
2
1
6
10
7
7
6
2
10
5
9
1
3
7
10
8
6
2
2
5
8
8
3
7
5
7
6
3
3
8
5
1
4
3
9
5
2
8
4
4
3
10
9
2
8
10
3
9
10
8
8
1
9
10
7
7
5
9
1
5
4
6
8
2
4
1
9
6
9
1
4
7
10
3
8
2
7
5
8
5
4
9
9
4
9
6
3
5
1
7
4
3
1
8
10
10
7
1
9
3
5
9
8
8
10
4
3
2
6
1
3
6
9
3
10
8
2
10
7
8
9
10
9
7
9
10
10
3
7
9
1
4
10
7
7
7
8
3
8
2
8
5
4
3
7
1
7
1
9
6
6
9
7
10
1
7
2
10
8
4
10
3
1
7
2
4
10
6
10
2
10
10
9
5
3
5
8
4
2
3
2
7
10
4
2
3
8
8
5
3
7
3
10
5
2
10
7
6
8
3
4
3
5
1
8
1
1
3
8
10
8
7
10
1
1
5
4
9
7
7
2
5
5
7
1
2
10
7
9
4
9
9
8
8
7
7
3
6
6
4
3
5
6
3
2
3
8
10
6
7
1
8
3
1
3
3
5
10
6
1
1
9
6
7
4
2
10
7
6
2
4
7
10
5
5
3
6
5
3
2
4
4
8
6
2
10
4


# 5. Converting data from csv files to pickle.
We have attached here the code that we ran locally on the computer to process the data and convert it to pickle for convenience.

In [None]:
import pandas as pd
import numpy as np
import pickle as pickle
from typing import List, Tuple


def convert_dataframe(df, has_y=True) -> Tuple[List[Tuple], List[Tuple]]:
    inputs = list(df.loc[:, df.columns != 0].itertuples(index=False, name=None))
    if has_y:
        outputs = list(df.loc[:, df.columns == 0].itertuples(index=False, name=None))
    else:
        outputs = []
    return inputs, outputs


def to_black_white(inputs):
    converted_inputs = []
    for x in inputs:
        arr = np.array(x)
        converted_inputs.append(np.reshape(arr, (32, 32, 3)).mean(axis=2).flatten())
    return converted_inputs


def read_from_files(file_name="../data/train.csv", pickle_name='train.pickle'):
    data_df = pd.read_csv(file_name, header=None)
    x, y = convert_dataframe(data_df)
    x = np.array(x)
    y = np.array(y)
    y = y - 1
    y = y.reshape(len(y), )
    with open(pickle_name, 'wb') as handle:
        pickle.dump((x, y), handle, protocol=pickle.HIGHEST_PROTOCOL)
    return x, y


def read_test_files(file_name="../data/test.csv", pickle_name='test.pickle'):
    data_df = pd.read_csv(file_name, header=None)
    x, _ = convert_dataframe(data_df, False)
    x = np.array(x)
    with open(pickle_name, 'wb') as handle:
        pickle.dump(x, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return x