# Multi Layer Perceptron

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

np.random.seed(42)

### iris dataset
The Iris dataset is a popular dataset in machine learning, containing 150 samples of iris flowers, with four features each, used for classification tasks. It consists of three classes representing three species of iris flowers: setosa, versicolor, and virginica.

In [2]:
# helper funcition for data preprocessing
def one_hot_encode(targets):
    encoded_targets = np.zeros((len(targets), 3))
    encoded_targets[np.arange(len(targets)), targets] = 1
    return encoded_targets

In [3]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

# data preprocessing
# scaling inputs with a mean of 0 and a standard deviation of 1 for each feature.
# one hot encoding targets, example: 2 --> [0, 0, 1]
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = one_hot_encode(y)               

# spliting for model validations
# full_dataset(150 samples) --> train_set(145 samples) and test_set(5 samples)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=5, random_state=42)

# sample 0
X_train[0], y_train[0]

(array([-0.53717756,  0.78880759, -1.2833891 , -1.05217993]),
 array([1., 0., 0.]))

In [4]:
# healper function for model validation
flower_names = ["Setosa", "Versicolor", "Virginica"]

def verify(model, string):
    print("\n\n",string,"id \t Actual \t\t Prediction \t right/wrong \n")
    
    data = X_train if string=="train" else X_test
    targets = y_train if string=="train" else y_test
    ids = np.random.randint(0, len(targets), 5) if string=="train" else list(range(5))
    
    for i in ids:
        pred, actual = model.predict(data[i]), np.argmax(targets[i])
        isright = "right" if pred==actual else "wrong"
        print(i,"\t\t",flower_names[actual],"\t\t",flower_names[pred], "\t",isright)

### Model
A Multi-Layer Perceptron (MLP) is a feedforward neural network with input, hidden, and output layers, used for supervised learning tasks. It learns complex patterns from data through backpropagation and activation functions<br><br>

In [5]:
class MLP():
    
    def __init__(self, hidden_layer_size=10):
        self.hidden_layer_weights = np.random.randn(hidden_layer_size, 4)
        self.hidden_layer_biases = np.random.rand(1, hidden_layer_size)
        self.output_layer_weights = np.random.randn(3, hidden_layer_size)
        self.output_layer_biases = np.random.rand(1, 3)
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def cross_entropy_loss(self, y, pred, epsilon=1e-15):
        pred = np.clip(pred, epsilon, 1-epsilon)
        loss = -np.sum(y * np.log(pred))/3
        return loss
    
    def feedforward(self, x):
        
        # a = sigmoid(x @ w.T + b)
        
        self.inputs = x
        self.a_hidden_layer = self.sigmoid(np.dot(self.inputs, self.hidden_layer_weights.T) + self.hidden_layer_biases)
        self.a_output_layer = self.sigmoid(np.dot(self.a_hidden_layer, self.output_layer_weights.T) + self.output_layer_biases)
        return self.a_output_layer
    
    def backpropagation(self, y, learning_rate):
        
        #   dC       dz       da       dC                 dC       dz       da       dC
        #  ----  =  ----  *  ----  *  ----               ----  =  ----  *  ----  *  ----
        #   dw       dw       dz       da                 db       db       dz       da
        
        ouput_layer_error = y - self.a_output_layer
        db_output_layer = ouput_layer_error
        dw_output_layer = (ouput_layer_error * self.sigmoid_derivative(self.a_output_layer)).T * self.a_hidden_layer

        hidden_layer_error = (ouput_layer_error * self.sigmoid_derivative(self.a_output_layer)) @ self.output_layer_weights
        db_hidden_layer = hidden_layer_error
        dw_hidden_layer = (hidden_layer_error * self.sigmoid_derivative(self.a_hidden_layer)).T * self.inputs

        self.output_layer_weights += learning_rate * dw_output_layer
        self.output_layer_biases += learning_rate * db_output_layer

        self.hidden_layer_weights += learning_rate * dw_hidden_layer
        self.hidden_layer_biases += learning_rate * db_hidden_layer
    
    def train(self, X_train, y_train, epochs=10, learning_rate=0.01):
        for e in range(epochs):
            loss = 0
            for i in range(len(X_train)):
                pred = self.feedforward(X_train[i])
                loss = self.cross_entropy_loss(y_train[i], pred)
                self.backpropagation(y_train[i], learning_rate)
            
            print('epoch ',(e+1),'\t---\tloss: ',loss)
    
    def predict(self, x):
        return np.argmax(self.feedforward(x))
        

In [6]:
mlp = MLP()

# Lets checking model performance before training
verify(mlp, 'train')
verify(mlp, 'test')



 train id 	 Actual 		 Prediction 	 right/wrong 

62 		 Virginica 		 Versicolor 	 wrong
95 		 Setosa 		 Virginica 	 wrong
51 		 Setosa 		 Virginica 	 wrong
95 		 Setosa 		 Virginica 	 wrong
131 		 Virginica 		 Virginica 	 right


 test id 	 Actual 		 Prediction 	 right/wrong 

0 		 Versicolor 		 Virginica 	 wrong
1 		 Setosa 		 Virginica 	 wrong
2 		 Virginica 		 Virginica 	 right
3 		 Versicolor 		 Virginica 	 wrong
4 		 Versicolor 		 Virginica 	 wrong


### Training

In [7]:
mlp = MLP()
mlp.train(X_train, y_train, epochs=100)

epoch  1 	---	loss:  0.0640759526499376
epoch  2 	---	loss:  0.12281814007713947
epoch  3 	---	loss:  0.16190395188855103
epoch  4 	---	loss:  0.17736860451240263
epoch  5 	---	loss:  0.17945359359249724
epoch  6 	---	loss:  0.17534168127654615
epoch  7 	---	loss:  0.1687798059141731
epoch  8 	---	loss:  0.16156763052321876
epoch  9 	---	loss:  0.15451671184571275
epoch  10 	---	loss:  0.14795653374987114
epoch  11 	---	loss:  0.14198883909432608
epoch  12 	---	loss:  0.1366126690896484
epoch  13 	---	loss:  0.13178469840528137
epoch  14 	---	loss:  0.12744764434000191
epoch  15 	---	loss:  0.12354310430726707
epoch  16 	---	loss:  0.12001690164248473
epoch  17 	---	loss:  0.11682090335841877
epoch  18 	---	loss:  0.11391323967832064
epoch  19 	---	loss:  0.1112578544105803
epoch  20 	---	loss:  0.10882382451909367
epoch  21 	---	loss:  0.10658464792868821
epoch  22 	---	loss:  0.10451758309698879
epoch  23 	---	loss:  0.10260306924612683
epoch  24 	---	loss:  0.10082423129789864
epoch

#### Is it trained?, Let's check

In [8]:
verify(mlp, 'train')



 train id 	 Actual 		 Prediction 	 right/wrong 

32 		 Setosa 		 Setosa 	 right
141 		 Virginica 		 Versicolor 	 wrong
20 		 Virginica 		 Virginica 	 right
47 		 Versicolor 		 Versicolor 	 right
127 		 Setosa 		 Setosa 	 right


In [9]:
verify(mlp, 'test')



 test id 	 Actual 		 Prediction 	 right/wrong 

0 		 Versicolor 		 Versicolor 	 right
1 		 Setosa 		 Setosa 	 right
2 		 Virginica 		 Virginica 	 right
3 		 Versicolor 		 Versicolor 	 right
4 		 Versicolor 		 Versicolor 	 right


#### Looks like our model learned somethig!