# Machine Learning - Training the models using a LSTM based network

First we are importing the necessary libraries and the datasets

In [1]:
import tensorflow as tf

from numpy import genfromtxt
import pandas as pd
from sklearn import preprocessing, model_selection
from tensorflow import keras
from tensorflow.metrics import precision
import matplotlib.pyplot as plt 
import os
from sklearn.metrics import precision_recall_fscore_support

features = genfromtxt('../datasets/final-data/features.csv', delimiter=',')
labels = genfromtxt('../datasets/final-data/labels.csv', delimiter=',')
#embedding_matrix = genfromtxt('../datasets/final-data/embedding_matrix.csv', delimiter=',')

train_x, test_x, train_y, test_y = model_selection.train_test_split(features,labels,test_size = 0.3, random_state = 0)

In [3]:
train_x.shape

(2982, 300)

# Creating the models to be tested

Now we are combining all the variable parameters and creating multiple models to be tested

In [17]:
# input_dim = len(embedding_matrix)
# input_length = len(train_x[0])

# Variable parameters to be tested
units_options = [150, 75, 35]
activation_functions = ["relu", "sigmoid", "tanh"]
learning_rates = [0.1, 0.01, 0.001, 0.0001]
momentum_options = [0.1, 0.01, 0.001, 0.0001]
decay_options = [0.1, 0.01, 0.001, 0.0001]

#Create multiple models combining all the parameters
models = []
for units in units_options:
    for learning_rate in learning_rates:
        for momentum in momentum_options:
            for activation_function in activation_functions:
                for decay in decay_options:

                    #Creating the network structure
                    model = keras.Sequential()

                    # Input, 
                    # Dense(linear(units = 150)), 
                    # Dense(relu), 
                    # batch_normalization(opcional, BatchNormalization1D), 
                    # linear(2)

                    model.add(
                        keras.layers.Input(
                            shape=300,
                            sparse=False
                        )
                    )


                    model.add(keras.layers.Dense(150))
                    model.add(keras.layers.Dense(units, activation = activation_function))
                    model.add(keras.layers.BatchNormalization())
                    model.add(keras.layers.Dense(2, activation='softmax'))

                    # Setting the optimizers parameters
                    optimizer = tf.keras.optimizers.SGD(
                        learning_rate=learning_rate,
                        decay=decay,
                        momentum=momentum,
                        nesterov=True
                    )

                    # Compiling the model
                    model.compile(
                        optimizer = optimizer,
                        loss='sparse_categorical_crossentropy',
                        metrics=['acc', 'mae', 'mse'])

                    # Including the new model in the models array
                    models.append(
                        {
                            "model_name": 'model_' + str(len(models) + 1),
                            "units": units, 
                            "learning_rate": learning_rate, 
                            "momentum": momentum,
                            "decay": decay,
                            "activation_function": 'relu',
                            "model": model
                        }
                    )


pd.DataFrame(models) 

Unnamed: 0,activation_function,decay,learning_rate,model,model_name,momentum,units
0,relu,0.1000,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_1,0.1000,150
1,relu,0.0100,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_2,0.1000,150
2,relu,0.0010,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_3,0.1000,150
3,relu,0.0001,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_4,0.1000,150
4,relu,0.1000,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_5,0.1000,150
5,relu,0.0100,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_6,0.1000,150
6,relu,0.0010,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_7,0.1000,150
7,relu,0.0001,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_8,0.1000,150
8,relu,0.1000,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_9,0.1000,150
9,relu,0.0100,0.1000,<tensorflow.python.keras.engine.sequential.Seq...,model_10,0.1000,150


# Training the models

Training all the created models and storing their performances

In [16]:
import numpy as np
training_results = []

trained_models = 1

for model_data in models:
    model = model_data["model"]

    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

    history = model.fit(
        train_x, 
        train_y, 
        epochs = 50, 
        validation_split=0.3, 
        batch_size = 16,  
        verbose=1, 
        shuffle=True,
        callbacks=[early_stop]
    )

    loss, acc, mae, mse = model.evaluate(test_x, test_y, verbose=1)
    
    test_output_probabilities = model.predict(
        test_x,
        batch_size=16,
        verbose=1,
        steps=None,
        callbacks=None,
        max_queue_size=10,
        workers=1,
        use_multiprocessing=False
    )
    
    predicted_y = np.argmax(test_output_probabilities, axis=1)
    
    precision, recall, fs_score, support = precision_recall_fscore_support(
        y_true = test_y, 
        y_pred = predicted_y, 
        average = 'binary'
    )

    training_results.append(
        {
            "model_name": model_data["model_name"],
            "units": model_data["units"], 
            "learning_rate": model_data["learning_rate"], 
            "momentum": model_data["momentum"],
            "decay": model_data["decay"],
            "activation_function": model_data["activation_function"],
            "model": model,
            "history": history,
            "acc": acc,
            "loss": loss,
            "mae": mae,
            "mse": mse,
            "precision": precision,
            "recall": recall,
            "fs_score": fs_score,
            "test_output_probabilities": test_output_probabilities,
            "test_y": test_y,
            "predicted_y": predicted_y
        }
    )
    print("Trained models: {}".format(trained_models))
    trained_models = trained_models + 1

Train on 2087 samples, validate on 895 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Trained models: 1


# Creating a dataframe with the training metrics

We are going to store those metrics in a separate file

In [107]:
training_metrics_df = pd.DataFrame(training_results)

training_metrics_df = training_metrics_df[['model_name', 'units', 'learning_rate', 'momentum', 'decay', 'activation_function', 'acc', 'loss', 'mae', 'mse', 'precision', 'recall', 'fs_score']]

training_metrics_df.head(10)

Unnamed: 0,model_name,units,learning_rate,momentum,decay,activation_function,acc,loss,mae,mse,precision,recall,fs_score
0,model_1,150,0.01,0.01,0.01,tanh,0.475477,0.695833,0.5,0.250592,0.478261,0.476965,0.477612


# Saving the general experiments files

Saving the files with general data about the experiment:

- All datasets used(train_x, train_y, test_x, test_y)
- The training metrics of the models

In [110]:
np.savetxt("../experiments/first_lstm_attempt/train_x.csv", train_x, delimiter=",")
np.savetxt("../experiments/first_lstm_attempt/train_y.csv", train_y, delimiter=",")
np.savetxt("../experiments/first_lstm_attempt/test_x.csv", test_x, delimiter=",")
np.savetxt("../experiments/first_lstm_attempt/test_y.csv", test_y, delimiter=",")
training_metrics_df.to_csv('../experiments/first_lstm_attempt/training_metrics.csv', index= False, encoding='utf-8')

# Saving specific metrics of each model

For each trained model we are going to save:

- Predicted labels over the test_x
- The output probability on the test_x prediction
- The history of the training
- The trained model

In [119]:
for training_result in training_results:
    # Created the model folder
    model_folder = "../experiments/first_lstm_attempt/trained_models/" + training_result["model_name"]
    if not os.path.exists(model_folder):
        os.mkdir(model_folder)
    
    # Saves the predicted labels and the predicted probabilities on the test_x predictions
    np.savetxt(model_folder + "/predicted_y.csv", training_result["predicted_y"], delimiter=",")
    np.savetxt(model_folder + "/test_output_probabilities.csv", training_result["test_output_probabilities"], delimiter=",")
    
    # Creates a history dataframe and saves it in a file
    history_df = pd.DataFrame(training_results[0]["history"].history)
    history_df['epoch'] = history.epoch

    history_df.to_csv(model_folder + "/history.csv", index= False, encoding='utf-8')
    
    # Saves the trained model in a file
    training_result["model"].save(filepath=model_folder + "/model.hdf5", overwrite=True, include_optimizer=True)