## Utility Function

In [56]:
import numpy as np


def linear(x):
    return x


def relu(x):
    return np.maximum(x, 0)


def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))


def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

## MLPClassifier Implementation

Parameter yang ada pada kelas MLPClassifier yaitu: **struktur jaringan (jumlah layer, jumlah neuron setiap layer, fungsi aktivasi setiap layer), initial weights tiap neuron, learning_rate, error_threshold, max_iter, batch_size**

In [57]:
import math
import numpy as np
import os
import json


class FFNNLayer:
    def __init__(self, number_of_neurons: int, activation_function: str):
        """
        :param number_of_neurons:
        :param activation_function:
        """
        self.number_of_neurons = number_of_neurons
        self.activation_function = activation_function


class MLPClassifier:
    def __init__(self, layers: list, weights, learning_rate=None, error_threshold=None, max_iter=None, batch_size=None,  stopped_by=None, expected_weights = None):
        """
        :param layers: list of FFNNLayer to specify the activation function and num of neurons for each layers
        :param learning_rate: the learning rate
        :param error_threshold: the error threshold
        :param max_iter: max iter to stop iteration
        :param batch_size: the size of batch for each mini batch
        """
        self.num_of_layers = len(layers)
        self.layers = layers
        self.learning_rate = learning_rate
        self.error_threshold = error_threshold
        self.error_sum = 1
        self.max_iter = max_iter
        self.batch_size = batch_size
        self.X_train = []
        self.y_train = []
        self.neuron_values = []
        self.weights = [weight[1:] for weight in weights]
        self.bias_weights = [weight[0] for weight in weights]
        self.prediction = []
        self.num_of_features = 0
        self.num_of_batches = 0
        self.d_weights = None
        self.d_bias_weights = None
        self.expected_stopped_by = stopped_by
        self.expected_weights = expected_weights
        self.expected_output = None
        self.stopped_by = None
        self.current_inputs = None

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        self.num_of_features = len(self.X_train)
        self.num_of_batches = math.ceil(len(self.X_train) / self.batch_size)

        # the first neuron is the X inputs themselves
        self.neuron_values = [[None for _ in range(layer.number_of_neurons)] for layer in self.layers]
        num_iter = 0
        while num_iter < self.max_iter:
            num_of_batches = math.ceil(len(self.X_train) / self.batch_size)
            err = 0
            for i in range(num_of_batches):
                self.__forward(i)
                self.__backward(i)
                err += self.__calculate_error(i)

            # Update the average error for this iteration
            self.error_sum = err / num_of_batches

            # Check if the error is below the threshold
            if self.error_sum <= self.error_threshold:
                break

            num_iter += 1

        self.stopped_by = "max_iteration" if num_iter == self.max_iter else "error_threshold"

        if self.expected_weights:
            self.__print_final_weights()

    def predict(self, X_test):
        """Perform forward pass to make predictions on input X_test

        Args:
            X_test: Input data for prediction (list)

        Returns:
            Predicted outputs for each sample in X_test
        """
        current_inputs = np.array(X_test)
        for i in range(self.num_of_layers):
            net = [np.matmul(x, self.weights[i]) + self.bias_weights[i] for x in current_inputs]
            act_func = self.layers[i].activation_function
            if act_func == 'linear':
                res = [linear(x) for x in net]
            elif act_func == 'relu':
                res = [relu(n) for n in net]
            elif act_func == 'sigmoid':
                res = [sigmoid(n) for n in net]
            elif act_func == "softmax":
                res = [softmax(n) for n in net]
            current_inputs = res
        return res

    def calculate_sse(self):
        sse = 0
        for layer in range(len(self.expected_weights)):
            for neuron in range(len(self.expected_weights[layer])):
                expected = np.array(self.expected_weights[layer][neuron])
                result = self.bias_weights[layer] if neuron == 0 else self.weights[layer][neuron-1]
                squared_error = (expected - result) ** 2
                sse += np.sum(squared_error)
        return sse

    def __forward(self, batch):
        start_idx = self.batch_size * batch
        self.expected_output = self.y_train[start_idx:start_idx + self.__get_curr_batch_size(batch)]
        self.current_inputs = self.X_train[start_idx:start_idx + self.__get_curr_batch_size(batch)]
        res = self.current_inputs
        for i in range(self.num_of_layers):
            net = [np.matmul(x, self.weights[i]) + self.bias_weights[i] for x in res]
            act_func = self.layers[i].activation_function
            if act_func == 'linear':
                res = [linear(x) for x in net]
            if act_func == 'relu':
                res = [relu(n) for n in net]
            if act_func == 'sigmoid':
                res = [sigmoid(n) for n in net]
            if act_func == "softmax":
                res = [softmax(n) for n in net]
            self.neuron_values[i] = res
        # print("pred", self.neuron_values[-1])    
        self.prediction = list(self.neuron_values[-1])

    def __backward(self, batch_idx):
        """
        do backward propagation for each batch
        :param batch_idx: the current batch that is processed
        """
        self.__init_d_weights()
        # get the current batch size
        batch_size = self.__get_curr_batch_size(batch_idx)

        # for each X in the batch
        for i in range(batch_size):
            d_k = np.zeros(0)
            for j in range(self.num_of_layers - 1, -1, -1):
                if j == self.num_of_layers - 1:       
                    delta = self.__calc_output_layer_delta(i)
                else:
                    delta = self.__calc_hidden_layer_delta(i, j, d_k)
                x = self.current_inputs[i] if j == 0 else self.neuron_values[j - 1][i]
                self.d_weights[j] += np.array([[d * n for d in delta] for n in x])
                self.d_bias_weights[j] += np.array(delta)
                d_k = delta.reshape(delta.shape[0], 1)
        
        self.weights = [np.array(self.weights[k]) + np.array(self.d_weights[k]) * self.learning_rate for k in
                        range(len(self.weights))]
        self.bias_weights = [np.array(self.bias_weights[k]) + np.array(self.d_bias_weights[k]) * self.learning_rate for
                             k in range(len(self.bias_weights))]

    def __calculate_error(self, batch_idx):
        """
        Calculate the error for the current batch
        :param batch_idx: the current batch that is processed
        """
        start_idx = self.batch_size * batch_idx
        end_idx = start_idx + self.__get_curr_batch_size(batch_idx)
        y_true = np.array(self.y_train[start_idx:end_idx])
        y_pred = np.array(self.prediction)

        # Get the activation function of the output layer
        act_func = self.layers[-1].activation_function

        # Calculate the error based on the activation function
        if act_func in ['relu', 'sigmoid', 'linear']:
            return 0.5 * np.sum((y_true - y_pred) ** 2)
        elif act_func == 'softmax':
            epsilon = 1e-20  # avoid taking log of zero
            return -np.sum(y_true * np.log(y_pred + epsilon))
        else:
            raise ValueError(f"Unsupported activation function: {act_func}")

    def __update_weights(self):
        self.weights = [np.array(self.weights[k]) + np.array(self.d_weights[k]) * self.learning_rate for k in
                        range(len(self.weights))]
        self.bias_weights = [np.array(self.bias_weights[k]) + np.array(self.d_bias_weights[k]) * self.learning_rate for
                             k in range(len(self.bias_weights))]

    def __init_d_weights(self):
        self.d_weights = [np.array([np.zeros(len(neuron_weight)) for neuron_weight in layer_weight])
                          for layer_weight in self.weights]
        self.d_bias_weights = [np.zeros(layer.number_of_neurons) for layer in self.layers]

    def __calc_output_diff(self, x_idx: int) -> np.ndarray:
        """
        :param x_idx:  the index of the current input on the X_train
        """
        y_train = self.expected_output[x_idx]  # get the expected output of the x
        output = self.prediction[x_idx]  # get the prediction
        return np.array([y - p for y, p in zip(y_train, output)])

    def __calc_act_function_derivative(self, act_func: str, y: list, target=None) -> np.ndarray:
        """
        :param y:  y is the output in a layer

        :return : a 1D array which is the sigmoid gradient of the neurons in a layer
        """
        if act_func == 'sigmoid':
            return np.array([x * (1-x) for x in y])

        elif act_func == 'relu':
            return np.array([1 if x > 0 else 0 for x in y])

        elif act_func == 'linear':
            return np.array([1 for _ in y])

        elif act_func == 'softmax':
            if target is None:
                raise ValueError("Target is required for softmax gradient")
            return np.array([-1 * (1-y[i]) if target == i else y[i] for i in range(len(y))])

        else:
            raise ValueError(f"Unknown activation function: {act_func}")


    def __calc_output_layer_delta(self, x_idx: int) -> np.ndarray:
        """
        :param x_idx:  the index of the current input on the X_train
        """
        # get the activation function for the last layer (output layer)
        act_func = self.layers[-1].activation_function  # get the activation function
    
        if act_func == 'softmax':
            return self.__calc_output_diff(x_idx)
        return self.__calc_act_function_derivative(act_func, self.prediction[x_idx]) * self.__calc_output_diff(x_idx)

    def __calc_hidden_layer_delta(self, batch_idx, layer_idx: int, output_error_term: np.ndarray) -> np.ndarray:
        """
        :param output_error_term: a 1D array of the error term of each weight calculated from the layer after
        :param layer_idx: the index of the current layer
        :param batch_idx: the index of the current batch

        hidden layer gradient = net gradient of the neuron values of current layer * the sum of weight * output error term
        """
        act_func = self.layers[layer_idx].activation_function
        activation_func_derivative = self.__calc_act_function_derivative(act_func,
                                                                         self.neuron_values[layer_idx][batch_idx])

        sum_d_net = [x[0] for x in np.matmul(self.weights[layer_idx + 1], output_error_term)]
        return np.array(activation_func_derivative
                        * sum_d_net)

    def __get_curr_batch_size(self, batch_idx):
        mod_res = len(self.X_train) % self.batch_size
        if batch_idx == self.num_of_batches - 1 and mod_res != 0:
            return mod_res
        return self.batch_size

    def __print_final_weights(self):
        print("========= EXPECTED =========")
        for weight in self.expected_weights:
            print("[")
            for neuron_weight in weight:
                print("  ", neuron_weight)
            print("], ")
        print("STOPPED BY: ", self.expected_stopped_by)

        print("========== ACTUAL ==========")

        for i in range(len(self.weights)):
            print("[")
            print("  ", self.bias_weights[i])
            for neuron_weight in self.weights[i]:
                print("  ", neuron_weight)
            print("], ")
        print("STOPPED BY: ", self.stopped_by)
    
    def calc_score(self, y_true, predictions):
        """
        Calculate the accuracy of predictions.

        :param y_true: True labels.
        :param predictions: Predictions from the model, as probabilities.
        
        :return: Accuracy as a float.
        """
        y_pred_indices = np.argmax(predictions, axis=1)
        y_true_indices = np.argmax(y_true, axis=1)
        
        accuracy = np.mean(y_pred_indices == y_true_indices)
        return accuracy
    
    def save_model(self, file_name, directory="model"):
            """
            Saves the model weights and configuration to model directory.
            """
            if not os.path.exists(directory):
                os.makedirs(directory)
            
            model_data = {
            "final_weights": [],
            "config": {
                "layers": [{"number_of_neurons": layer.number_of_neurons,
                            "activation_function": layer.activation_function} for layer in self.layers],
                }
            }

            for weights, bias in zip(self.weights, self.bias_weights):
                bias_rounded = np.round(bias, 6)
                weights_rounded = np.round(weights,6)
                bias_reshaped = np.reshape(bias_rounded, (1, len(bias_rounded)))
                integrated_layer_weights = np.vstack([bias_reshaped, weights_rounded])
                model_data["final_weights"].append(integrated_layer_weights.tolist())

            new_file_name = "model-" + os.path.basename(file_name)
            # Save to JSON file
            with open(os.path.join(directory, new_file_name), "w") as json_file:
                json.dump(model_data, json_file)
            
            print("Model saved successfully to JSON.")

    @classmethod
    def load_model(cls, file_name, directory="model"):
        """
        Loads the model weights and configuration from model directory.
        """
        # Load configuration
        with open(os.path.join(directory, file_name), "r") as json_file:
            model_data = json.load(json_file)
        
        layers = [FFNNLayer
                  (layer_conf["number_of_neurons"], layer_conf["activation_function"])
                  for layer_conf in model_data["config"]["layers"]
                ]
        
        #  Create new instance
        classifier = cls(
            layers=layers,
            weights=[],  
        )

        classifier.weights = []
        classifier.bias_weights = []
        for integrated_weights in model_data["final_weights"]:
            np_weights = np.array(integrated_weights)
            classifier.bias_weights.append(np_weights[0, :])
            classifier.weights.append(np_weights[1:, :])  
        return classifier
    
    def print_model(self):
        for i in range(len(self.weights)):
            print("[")
            print("  ", self.bias_weights[i])
            for neuron_weight in self.weights[i]:
                print("  ", neuron_weight)
            print("], ")
    

## Main Program

Dijalankan untuk melakukan pengujian berdasarkan test case yang diberikan. Test case diuji dengan menuliskan path dari file test case pada input.

In [58]:
import json
file_path = input("Enter json file path: ")
f = open(file_path)
data = json.load(f)

try:
    data_layers = data["case"]["model"]["layers"]
    layers = []
    for layer in data_layers:
        activation_func = layer["activation_function"]
        if activation_func not in ["linear", "relu", "sigmoid", "softmax"]:
            raise Exception("Activation function " + activation_func + " not available")
        layers.append(FFNNLayer(layer["number_of_neurons"], activation_func))

    weights = data["case"]["initial_weights"]
    input_size = data["case"]["model"]["input_size"]
    X_train = data["case"]["input"]
    y_train = data["case"]["target"]
    learning_rate = data["case"]["learning_parameters"]["learning_rate"]
    batch_size = data["case"]["learning_parameters"]["batch_size"]
    max_iteration = data["case"]["learning_parameters"]["max_iteration"]
    error_threshold = data["case"]["learning_parameters"]["error_threshold"]

    expected_weights = data["expect"]["final_weights"]
    expected_stopped_by = data["expect"]["stopped_by"]

    model = MLPClassifier(layers, weights, learning_rate, error_threshold, max_iteration, batch_size, expected_stopped_by, expected_weights)

    model.fit(X_train, y_train)

    sse = model.calculate_sse()
    print(f"Sum Squared Error: {sse:.4f}")
    if sse < 1e-7:
        print("Sum Squared Error(SSE) of prediction is lower than Maximum SSE")
    else:
        print("Sum Squared Error(SSE) of prediction surpass the Maximum SSE")
    model.save_model(file_path)
except KeyError as ke:
    print('Key', ke, "not found in json data. Please check your json data format")
except Exception as error:
    print("An exception occurred: ", error)

[
   [0.1008, 0.3006, 0.1991]
   [0.402, 0.201, -0.7019]
   [0.101, -0.799, 0.4987]
], 
STOPPED BY:  max_iteration
[
   [0.1012 0.3006 0.1991]
   [ 0.4024  0.201  -0.7019]
   [ 0.1018 -0.799   0.4987]
], 
STOPPED BY:  max_iteration
Sum Squared Error: 0.0000
Sum Squared Error(SSE) of prediction surpass the Maximum SSE
Model saved successfully to JSON.


Pengujian test case menggunakan library **Keras Tensorflow**

In [59]:
import tensorflow as tf

model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(input_size,)))

initial_weights = [np.array(layer) for layer in weights]

for i, layer_data in enumerate(data_layers):
  layer = tf.keras.layers.Dense(units=layer_data["number_of_neurons"], activation=layer_data["activation_function"], use_bias=True)

  if i == 0:
    layer.build(input_shape=(input_size,))
  else:
    layer.build(input_shape=(data_layers[i-1]["number_of_neurons"],))
  
  layer_bias = initial_weights[i][0]
  layer_weight = np.array(initial_weights[i][1:])
  layer.set_weights([layer_weight, layer_bias])
  model.add(layer)

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), loss='mean_squared_error')
model.fit(np.array(X_train), np.array(y_train), epochs=max_iteration, batch_size=batch_size)

final_weights = []
for i, layer in enumerate(model.layers):
  keras_weights, biases = layer.get_weights()
  combined_layer_weights = [list(biases)] + list(keras_weights)
  final_weights.append(combined_layer_weights)

print("========= EXPECTED =========")
for weight in expected_weights:
    print("[")
    for neuron_weight in weight:
        print("  ", neuron_weight)
    print("], ")
print("STOPPED BY: ", expected_stopped_by)
print("========== ACTUAL ==========")
for weight in final_weights:
    print("[")
    for neuron_weight in weight:
        print("  ", neuron_weight)
    print("], ")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - loss: 0.2217
[
   [0.1008, 0.3006, 0.1991]
   [0.402, 0.201, -0.7019]
   [0.101, -0.799, 0.4987]
], 
STOPPED BY:  max_iteration
[
   [0.1004, 0.30020002, 0.1997]
   [ 0.40080002  0.20033334 -0.70063335]
   [ 0.1006     -0.7996667   0.49956667]
], 


## Pengujian pada Dataset Iris

Akan dilakukan pengujian pada dataset iris dengan parameter sebagai berikut:
* Struktur jaringan: 1 hidden layer 4 neuron dan fungsi aktivasi ReLU
* Initial weights: akan diinitialize secara random dengan nilai dalam interval -0.5 - 0.5
* learning_rate: 0.1
* error_threshold: 0.0001
* max_iter: 1000
* batch_size: 50

### Preprocessing data

In [60]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_csv("test_cases_mlp/iris.csv")

# Split the data
X = data.drop(columns=["Species", "Id"])
y = data["Species"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Encode the data
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_test_encoded = encoder.transform(y_test)

def one_hot_encode(labels, num_classes):
    one_hot = np.zeros((len(labels), num_classes), dtype=int)
    one_hot[np.arange(len(labels)), labels] = 1
    return one_hot.tolist()

# One-hot encode the labels
y_train = one_hot_encode(y_train_encoded, 3)
y_test = one_hot_encode(y_test_encoded, 3)

### Pengujian pada kelas implementasi MLPClassifier

In [61]:
import random

# Softmax layer is added because the iris dataset is a classification task, aligned with sklearn implementation
layers = [
    FFNNLayer(4, 'relu'),
    FFNNLayer(3, 'softmax')
]

iteration = 0
max_score, max_model = -1, None
while iteration < 100:
    # Generate random weights for each layer
    initial_weights = []
    for i, layer in enumerate(layers):
        if i == 0:
            num_rows = len(X_train[0]) + 1
        else:
            num_rows = layers[i - 1].number_of_neurons + 1
        
        layer_weights = [[random.uniform(-0.5, 0.5) for _ in range(layer.number_of_neurons)] for _ in range(num_rows)]
        initial_weights.append(layer_weights)

    implementation_model = MLPClassifier(layers=layers, weights=initial_weights, learning_rate=0.1, error_threshold=0.0001, max_iter=1000, batch_size=50, stopped_by="error_threshold")
    implementation_model.fit(X_train, y_train)
    prediction_implementation_model = implementation_model.predict(X_test)

    score = implementation_model.calc_score(y_test, prediction_implementation_model)

    if score > max_score:
        max_score = score
        max_model = implementation_model

    if max_score > 0.95:
        break

    iteration += 1

# Save the model
print("Accuracy Score: ", max_score)
max_model.save_model("iris.json")

# Load the model again to check
loaded_model = MLPClassifier.load_model("model-iris.json")
loaded_model.print_model()

prediction_with_loaded_model = loaded_model.predict(X_test)
score_loaded_model = loaded_model.calc_score(y_test, prediction_with_loaded_model)
print("Accuracy Score by loaded model: ", score_loaded_model)

Accuracy Score:  0.9666666666666667
Model saved successfully to JSON.
[
   [-0.385767  0.707035 -2.181451  8.43606 ]
   [-0.197066 -1.517243 -0.265902  1.440485]
   [-0.060373  1.624506  0.782409  2.369535]
   [-0.060027 -2.781527 -0.14648  -7.446527]
   [-0.021666 -2.624976  0.328015 -4.714587]
], 
[
   [-5.005029  1.090531  4.0993  ]
   [-0.137552 -0.105708  0.065956]
   [ 3.41615  -2.726317 -0.84273 ]
   [-0.524307  0.482128  0.833498]
   [ 2.080643  3.807932 -5.52697 ]
], 
Accuracy Score by loaded model:  0.9666666666666667


### Pengujian menggunakan library scikit-learn

Dikarenakan inisialisasi model pada library scikit-learn tidak dapat didefinisikan initial weights-nya, maka parameter initial_weights tidak digunakan.

In [66]:
from sklearn.neural_network import MLPClassifier
# Define the model with mini-batch gradient descent 
model = MLPClassifier(hidden_layer_sizes=(4),learning_rate='constant', learning_rate_init=0.1, alpha=0.0001, solver='sgd', batch_size=50, max_iter=1000, activation='relu') 
  
# Train model
model.fit(X_train, y_train) 
  
# Evaluate model
score = model.score(X_test, y_test) 
print("Accuracy Score: ", score) 

Accuracy Score:  1.0
