<a href="https://colab.research.google.com/github/chaserobertson/credit-fraud/blob/main/mlp_hyp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --quiet mealpy
!pip install --quiet permetrics

In [2]:
# !/usr/bin/env python
# Created by "Thieu" at 23:59, 14/12/2021 ----------%
#       Email: nguyenthieu2102@gmail.com            %
#       Github: https://github.com/thieu1995        %
# --------------------------------------------------%

# https://machinelearningmastery.com/how-to-develop-multilayer-perceptron-models-for-time-series-forecasting/

# 1. Fitness function
# 2. Lower bound and upper bound of variables
# 3. Min or max
# 4. Number of dimensions (number of variables)


# Assumption that we are trying to optimize the multi-layer perceptron with 3 layer 1 input, 1 hidden, 1 output.
# 1. Batch-size training
# 2. Epoch training
# 3. Optimizer
# 4. Learning rate
# 5. network weight initialization
# 6. activation functions
# 7. number of hidden units

# Rules:
# 1. Batch-size: [ 2, 4, 8 ]
# 2. Epoch : [700, 800, .... 2000]
# 3. Optimizer: ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
# 4. Learning rate: [0.01 -> 0.5]       real number
# 5. network weight initialization: ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
# 6. activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
# 7. hidden units: [5, 100] --> integer number


# solution = [ x1, x2, x3, x4, x5, x6, x7, x8,  ]

# 1st solution: hidden layer = 2  ==> [ x1, x2, x3, x4, x5, x6, x7, x8, x9, x10 ]
# x9: the number of hidden units of layer 1
# x10: the number of hidden units of layer 2
# 2nd solution: hidden layer = 4 ==> [ x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12 ]
# But in Metaheuristic Process --> You can not expand the solution.
# The number of dimensions is fixed before and after the MH process.


# 1 way: solution = [ x1, x2, x3, x4, x5, x6, x7, x8,  ]
# x8: should be the number of hidden layer
# x7: should be the number of hidden node in each layer --> all hidden layer has the same number of hidden node.


# 2 way: I limit the number of hidden layers to 5. Number of hidden layers belongs [1, 5]

# solution = [ x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13 ]
# x8: number of hidden layers
# x9: number of hidden units in 1st the hidden layer
# x10: ..... 2nd hidden layer
# x11: .... 3rd hidden layer
# x12: .... 4th hidden layer
# x13: .... 5th hidden layer

# if a solution with x8 = 2 hidden layers ==>
# x9, x10 --> then ignore other values: x11, x12, x13

# solution 1 = [x1, x2, x3, x4, x5, x6, x7, x8, 100, 50, 10, 10, 10]
# solution 2 = [x1, x2, x3, x4, x5, x6, x7, x8, 100, 50, 10, 10, 20]
# solution 3 = [x1, x2, x3, x4, x5, x6, x7, x8, 100, 50, 10, 10, 30]

# 8. Number of hidden layers with number of hidden nodes in each layers.


# univariate mlp example
import numpy as np
import tensorflow as tf
import keras
from keras import backend as k

import statsmodels.datasets.co2 as co2
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from permetrics.regression import RegressionMetric
#from mealpy.examples.applications.keras.timeseries_util import decode_solution, generate_loss_value
from mealpy.evolutionary_based import FPA
from mealpy.swarm_based import PSO


# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    ## https://machinelearningmastery.com/machine-learning-data-transforms-for-time-series-forecasting/
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence) - 1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)


def generate_data():
    ## Make dataset
    dataset = co2.load(as_pandas=True).data
    dataset = dataset.fillna(dataset.interpolate())
    scaler = MinMaxScaler()
    scaled_seq = scaler.fit_transform(dataset.values).flatten()

    # choose a number of time steps
    n_steps = 3
    # split into samples            60% - training
    x_train_point = int(len(scaled_seq) * 0.75)
    X_train, y_train = split_sequence(scaled_seq[:x_train_point], n_steps)
    X_test, y_test = split_sequence(scaled_seq[x_train_point:], n_steps)

    return {"X_train": X_train, "y_train": y_train, "X_test": X_test, "y_test": y_test, "n_steps": n_steps}


def decode_solution(solution, data):
    batch_size = 2 ** int(solution[0])
    epoch = 10 * int(solution[1])
    opt_integer = int(solution[2])
    opt = data["OPT_ENCODER"].inverse_transform([opt_integer])[0]
    learning_rate = solution[3]
    network_weight_initial_integer = int(solution[4])
    network_weight_initial = data["WOI_ENCODER"].inverse_transform([network_weight_initial_integer])[0]
    act_integer = int(solution[5])
    activation = data["ACT_ENCODER"].inverse_transform([act_integer])[0]
    n_hidden_units = int(solution[6])
    return {
        "batch_size": batch_size,
        "epoch": epoch,
        "opt": opt,
        "learning_rate": learning_rate,
        "network_weight_initial": network_weight_initial,
        "activation": activation,
        "n_hidden_units": n_hidden_units,
    }


def generate_loss_value(structure, data):
    # define model
    model = Sequential()
    model.add(Dense(structure["n_hidden_units"], activation=structure["activation"],
                    input_dim=data["n_steps"], kernel_initializer=structure["network_weight_initial"]))
    model.add(Dense(1))

    # Compile model
    optimizer = getattr(tf.keras.optimizers, structure["opt"])(learning_rate=structure["learning_rate"])
    model.compile(optimizer=optimizer, loss='mse')

    # fit model
    model.fit(data["X_train"], data["y_train"], epochs=structure["epoch"], batch_size=structure["batch_size"], verbose=2)

    # We take the loss value of validation set as a fitness value for selecting
    # the best model demonstrate prediction
    y_pred = model.predict(data["X_test"])

    evaluator = RegressionMetric(data["y_test"], y_pred, decimal=6)
    return evaluator.mean_squared_error()


def fitness_function(solution, data):
    structure = decode_solution(solution, data)
    fitness = generate_loss_value(structure, data)
    return fitness


if __name__ == "__main__":
    # LABEL ENCODER
    OPT_ENCODER = LabelEncoder()
    OPT_ENCODER.fit(['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'])  # domain range ==> 7 values

    WOI_ENCODER = LabelEncoder()
    WOI_ENCODER.fit(['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'])

    ACT_ENCODER = LabelEncoder()
    ACT_ENCODER.fit(['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear'])

    DATA = generate_data()
    DATA["OPT_ENCODER"] = OPT_ENCODER
    DATA["WOI_ENCODER"] = WOI_ENCODER
    DATA["ACT_ENCODER"] = ACT_ENCODER

    LB = [1, 5, 0, 0.01, 0, 0, 5]
    UB = [3.99, 20.99, 6.99, 0.5, 7.99, 7.99, 50]

    problem = {
        "fit_func": fitness_function,
        "lb": LB,
        "ub": UB,
        "minmax": "min",
        "log_to": None,
        "save_population": False,
        "data": DATA,
    }
    # model = FPA.BaseFPA(problem, epoch=5, pop_size=20)
    model = PSO.BasePSO(problem, epoch=5, pop_size=20)
    model.solve()

    print(f"Best solution: {model.solution[0]}")
    sol = decode_solution(model.solution[0], DATA)

    print(f"Batch-size: {sol['batch_size']}, Epoch: {sol['epoch']}, Opt: {sol['opt']}, "
          f"Learning-rate: {sol['learning_rate']}, NWI: {sol['network_weight_initial']}, "
          f"Activation: {sol['activation']}, n-hidden: {sol['n_hidden_units']}")

  import pandas.util.testing as tm


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 19/200
214/214 - 0s - loss: 1.1653e-04 - 266ms/epoch - 1ms/step
Epoch 20/200
214/214 - 0s - loss: 1.1754e-04 - 255ms/epoch - 1ms/step
Epoch 21/200
214/214 - 0s - loss: 1.2825e-04 - 283ms/epoch - 1ms/step
Epoch 22/200
214/214 - 0s - loss: 1.1467e-04 - 284ms/epoch - 1ms/step
Epoch 23/200
214/214 - 0s - loss: 1.1658e-04 - 262ms/epoch - 1ms/step
Epoch 24/200
214/214 - 0s - loss: 1.2054e-04 - 277ms/epoch - 1ms/step
Epoch 25/200
214/214 - 0s - loss: 1.1976e-04 - 272ms/epoch - 1ms/step
Epoch 26/200
214/214 - 0s - loss: 1.1306e-04 - 287ms/epoch - 1ms/step
Epoch 27/200
214/214 - 0s - loss: 1.2813e-04 - 280ms/epoch - 1ms/step
Epoch 28/200
214/214 - 0s - loss: 1.1477e-04 - 270ms/epoch - 1ms/step
Epoch 29/200
214/214 - 0s - loss: 1.0961e-04 - 259ms/epoch - 1ms/step
Epoch 30/200
214/214 - 0s - loss: 1.1315e-04 - 266ms/epoch - 1ms/step
Epoch 31/200
214/214 - 0s - loss: 1.2350e-04 - 259ms/epoch - 1ms/step
Epoch 32/200
214/214 - 0s