# Deep Learning - Processing

## Importing Packages

In [None]:
# try:
#     from google.colab import drive
#     drive.mount('/content/drive', force_remount=False)
# except:
#     pass

In [None]:
try:
    import google.colab
    IN_COLAB = True
    datasets_folder = '/content/drive/My Drive/Colab Notebooks/DataSets/'
    experiments_folder = '/content/drive/My Drive/Colab Notebooks/Experiments/'
except:
    IN_COLAB = False
    datasets_folder = '/Google Drive/Colab Notebooks/DataSets/'
    experiments_folder = '/Google Drive/Colab Notebooks/Experiments/'  

print("In Colab:", IN_COLAB)
print("Dataset Folder:", datasets_folder)
print("Experiments Folder:", experiments_folder)

In [None]:
!pip install livelossplot --quiet
!pip install -q -U keras-tuner --quiet
!pip install hiplot --quiet

In [None]:
import os
import gc
import json
import shutil
import time
import random
import IPython
from IPython.display import display
import hiplot as hip
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import History, ModelCheckpoint, CSVLogger, EarlyStopping, Callback
from tensorflow.keras.layers import GlobalMaxPooling1D, AveragePooling1D , Input, Activation, Dense, Dropout, SpatialDropout1D, Conv1D, TimeDistributed, MaxPooling1D, Flatten, ConvLSTM2D, Bidirectional, BatchNormalization, GlobalAvgPool1D, GlobalAveragePooling1D, MaxPooling1D, LSTM, GRU
from tensorflow.keras.utils import plot_model
from livelossplot import PlotLossesKerasTF
import kerastuner as kt
print("Using Tensorflow", tf.__version__)

## Packages Parameters

In [None]:
# seed_value = 7
# np.random.seed(seed_value)
# tf.random.set_seed(seed_value)
# random.seed(seed_value)

In [None]:
plt.rcParams["figure.figsize"] = (16,6)
pd.options.display.float_format = '{:.5f}'.format

## Dataset Functions

In [None]:
moving_window=False
last_label=False
data_class_labels = ["irregular_speed_bump", "regular_speed_bump"]
join_labels=True
join_labels_name="speed_bump"

In [None]:
def getDataSets(folder=datasets_folder):
    
    """Load raw datasets from the disk.

    Args:
        folder (str): Root folder of PVS datasets. Within this root folder are the PVS folders and their files.

    Returns:
        dict: datasets in a dict form: 
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """

    datasets = {}
    
    for i in range(1, 10):
        
        dataset_folder = os.path.join(folder, "PVS " + str(i))

        left =   pd.read_csv(os.path.join(dataset_folder, 'dataset_gps_mpu_left.csv'))              #,  float_precision="high" , dtype=np.float32
        right =  pd.read_csv(os.path.join(dataset_folder, 'dataset_gps_mpu_right.csv'))             #,  float_precision="high" , dtype=np.float32
        labels = pd.read_csv(os.path.join(dataset_folder, 'dataset_labels.csv'), dtype=np.uint8)    #,  float_precision="high"
        
        datasets["pvs_" + str(i)] = {
            "left": left,
            "right": right,
            "labels": labels
        }
    
    return datasets

def getFields(acc=False, gyro=False, mag=False, temp=False, speed=False, location=False, below_suspension=False, above_suspension=False, dashboard=False):
    
    """Get fields names filtering by data type and placement.

    Args:
        acc (bool): to return accelerometer fields.
            (default is False)
        gyro (bool): to return gyroscope fields.
            (default is False)
        mag (bool): to return magnetometer fields.
            (default is False)
        temp (bool): to return temperature field.
            (default is False)
        speed (bool): to return speed field.
            (default is False)
        location (bool): to return GPS location fields.
            (default is False)
        below_suspension (bool): to return fields of data sampled next and below suspension.
            (default is False)
        above_suspension (bool): to return fields of data sampled next and above suspension.
            (default is False)
        dashboard (bool): to return fields of data sampled in the dashboard.
            (default is False)

    Returns:
        list: list of fields.
    """

    all_fields = [
        'timestamp', 
        'acc_x_dashboard', 'acc_y_dashboard', 'acc_z_dashboard',
        'acc_x_above_suspension', 'acc_y_above_suspension', 'acc_z_above_suspension', 
        'acc_x_below_suspension', 'acc_y_below_suspension', 'acc_z_below_suspension', 
        'gyro_x_dashboard', 'gyro_y_dashboard', 'gyro_z_dashboard', 
        'gyro_x_above_suspension', 'gyro_y_above_suspension', 'gyro_z_above_suspension',
        'gyro_x_below_suspension', 'gyro_y_below_suspension', 'gyro_z_below_suspension', 
        'mag_x_dashboard', 'mag_y_dashboard', 'mag_z_dashboard', 
        'mag_x_above_suspension', 'mag_y_above_suspension', 'mag_z_above_suspension', 
        'temp_dashboard', 'temp_above_suspension', 'temp_below_suspension', 
        'timestamp_gps', 'latitude', 'longitude', 'speed'
    ]
    
    return_fields = []
    
    for field in all_fields:
            
        data_type = False
        placement = False
        
        if (speed and field == "speed"):
            placement = data_type = True
            
        if (location and (field == "latitude" or field == "longitude")):
            placement = data_type = True
        
        if (acc):
            data_type = data_type or field.startswith("acc_")
        
        if (gyro):
            data_type = data_type or field.startswith("gyro_")
            
        if (mag):
            data_type = data_type or field.startswith("mag_")
            
        if (temp):
            data_type = data_type or field.startswith("temp_")
            
        if (below_suspension):
            placement = placement or field.endswith("_below_suspension")
            
        if (above_suspension):
            placement = placement or field.endswith("_above_suspension")
            
        if (dashboard):
            placement = placement or field.endswith("_dashboard")
        
        if (data_type and placement):
            return_fields.append(field)
            
    return return_fields

def getSubSets(datasets, fields, labels=data_class_labels, join_labels=join_labels, join_labels_name=join_labels_name):

    """Get subsets from raw datasets. For each PVS dataset, extract a subset with only fields/labels passed.

    Args:
        datasets (dict): raw PVS datasets.
        fields (string[]): fields to extract.
        labels (string[]): labels to extract.

    Returns:
        dict: subsets in dict form:
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """

    subsets = {}
    
    for key in datasets.keys():
        
        subsets[key] = {
            "left": datasets[key]["left"][fields],
            "right": datasets[key]["right"][fields],
            "labels": datasets[key]["labels"][labels]
        }

        if join_labels:
            subsets[key]["labels"] = pd.DataFrame(data=subsets[key]["labels"].sum(axis=1), columns=[join_labels_name])
    
    return subsets

def getNormalizedDataMinMax(subsets, scaler_range=(-1,1)):

    """Get normalized data. Uses MinMaxScaler.

    Args:
        subsets (dict): subsets to be normalized.
        scaler_range (tuple): range to scale, such as (0,1) or (-1,1).

    Returns:
        dict: subsets normalized in dict form:
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """

    scaler = MinMaxScaler(feature_range=scaler_range)
    return getNormalizedData(subsets, scaler)

def getNormalizedDataRobust(subsets): 

    """Get standardized data. Uses RobustScaler.

    Args:
        subsets (dict): subsets to be standardized.

    Returns:
        dict: subsets normalized in dict form:
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """

    scaler = RobustScaler()            
    return getNormalizedData(subsets, scaler)

def getNormalizedData(subsets, scaler):
    
    """Get standardized/normalized data.

    Args:
        subsets (dict): subsets to be standardized/normalized.
        scaler (object): scaler to transform values.

    Returns:
        dict: subsets normalized in dict form:
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """
    
    normalized_sets = {}
    learn_data = pd.DataFrame()

    for pvs in subsets.keys():
        for side in ["left", "right"]:
            learn_data = learn_data.append(subsets[pvs][side], ignore_index=True)

    scaler = scaler.fit(learn_data)
    del learn_data
    
    for pvs in subsets.keys():
        
        normalized_sets[pvs] = {
            'left':  pd.DataFrame(data=scaler.transform(subsets[pvs]['left']),  columns=subsets[pvs]['left'].columns),
            'right': pd.DataFrame(data=scaler.transform(subsets[pvs]['right']), columns=subsets[pvs]['right'].columns),
            'labels': subsets[pvs]['labels']
        }
                    
    return normalized_sets 

def getReshapedData(subsets, shape):  

    """Reshape data.

    Args:
        subsets (dict): subsets to be reshaped.
        shape (tuple): shape to reshape data. Must have the form (..., ..., ..., features).
        last_label (boolean): if use last label in the window, otherwise uses mode label.

    Returns:
        dict: subsets reshaped in dict form:
        { 
            "pvs_x": { 
                "left": np.array, 
                "right": np.array, 
                "labels": np.array 
            } 
        }
    """

    shape = tuple([x for x in shape if x is not None])

    if last_label:
        print("Using last label")
    else:
        print("Using mode label")

    reshaped_sets = {}

    window = 1

    for dim in shape:
        window = window * dim

    window = int(window / shape[-1])

    for key in subsets.keys():

        reshaped_sets[key] = {};

        for side in ['left', 'right']:

            inputs = subsets[key][side].values
            outputs = subsets[key]["labels"].values
  
            inputs_reshaped = []
            outputs_reshaped = []

            for i in range(window, len(inputs) + 1):
                
                input_window = inputs[i-window:i, :]

                if last_label:
                    output_window = outputs[i-1, :]
                else: # mode label
                    output_window = outputs[i-window:i, :].mean(axis=0).round(0)

                if moving_window or i % window == 0 or output_window[0] > 0:
                    inputs_reshaped.append(input_window.reshape(shape))
                    outputs_reshaped.append(output_window) 

            reshaped_sets[key][side] = np.array(inputs_reshaped) # inputs_reshaped
            reshaped_sets[key]['labels'] = np.array(outputs_reshaped) # outputs_reshaped
            del inputs_reshaped, outputs_reshaped

    return reshaped_sets, window

def getTrainValidationSets(reshaped_sets, sets_train, sets_test, sides_train, sides_test):

    """Get train and test sets from pre-processed sets.

    Args:
        reshaped_sets (dict): all pre-processed sets.
        sets_train (string[]): PVS datasets to be used in train.
        sets_test (string[]): PVS datasets to be used in validation.
        sides_train (string[]): PVS datasets sides to be used in train. 
        sides_test (string[]): PVS datasets sides to be used in validation. 

    Returns:
        input_train (list|np.array): input train values.
        input_validation (list|np.array): input validation values. 
        output_train (list|np.array): output train values.
        output_validation (list|np.array): output validation values. 
    """

    input_train = []
    input_validation = []
    output_train = []
    output_validation = []

    for key in reshaped_sets.keys():

        for side in ["left", "right"]:

            input_ref = None
            output_ref = None
                
            if (key in sets_train and side in sides_train):
                input_ref = input_train
                output_ref = output_train
            
            elif (key in sets_test and side in sides_test):
                input_ref = input_validation
                output_ref = output_validation
            
            inputs = reshaped_sets[key][side]
            outputs = reshaped_sets[key]["labels"]

            if input_ref is not None:
                for inp in inputs:
                    input_ref.append(inp)

            if output_ref is not None:
                for out in outputs:
                    output_ref.append(out)   

    return np.array(input_train), np.array(input_validation), np.array(output_train), np.array(output_validation) # input_train, input_validation, output_train, output_validation

## Experiments Parameters

In [None]:
experiment_by_dataset = [
    { 
        "sets_train": ["pvs_1", "pvs_3", "pvs_4", "pvs_6", "pvs_7", "pvs_9"], 
        "sets_test": ["pvs_2", "pvs_5", "pvs_8"],
        "sides_train": ["left", "right"],
        "sides_test": ["left", "right"]
    },
    { 
        "sets_train": ["pvs_1", "pvs_2", "pvs_3", "pvs_7", "pvs_8", "pvs_9"], 
        "sets_test":  ["pvs_4", "pvs_5", "pvs_6"],
        "sides_train": ["left", "right"],
        "sides_test": ["left", "right"]
    },
    { 
        "sets_train": ["pvs_1", "pvs_2", "pvs_4", "pvs_6", "pvs_8", "pvs_9"], 
        "sets_test":  ["pvs_3", "pvs_5", "pvs_7"],
        "sides_train": ["left", "right"],
        "sides_test": ["left", "right"]
    },
    # { 
    #     "sets_train": ["pvs_1", "pvs_2", "pvs_3", "pvs_4", "pvs_5", "pvs_6", "pvs_7", "pvs_8", "pvs_9"], 
    #     "sets_test":  ["pvs_1", "pvs_2", "pvs_3", "pvs_4", "pvs_5", "pvs_6", "pvs_7", "pvs_8", "pvs_9"],
    #     "sides_train": ["left"],
    #     "sides_test": ["right"]
    # }
]

experiment_by_placement = [
    ("Below Suspension", getFields(acc=True, gyro=True, speed=True, below_suspension=True)),
    ("Above Suspension", getFields(acc=True, gyro=True, speed=True, above_suspension=True)),
    ("Dashboard",        getFields(acc=True, gyro=True, speed=True, dashboard=True))
]

## Model Management

In [None]:
batch_size=64 # 32
epochs=1000
patience=10
min_delta=0.0001
shuffle=True
num_tests=2

In [None]:
class ClearTrainingOutput(Callback):
        
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait = True)

In [None]:
def createPathIfNotExists(path):

    if not os.path.exists(path):
        os.makedirs(path)

def modelFileSavedFormat(filename):
    return filename + '-train-acc-{acc:.5f}-val-acc-{val_acc:.5f}.hdf5'

def saveModelDiagram(model, path, filename, show=True):
    
    createPathIfNotExists(path)
    plot_model(model, to_file=os.path.join(path, filename + '.png'), show_shapes=True, show_layer_names=True)

    if show:
        display(plot_model(model, show_shapes=True, show_layer_names=True))
        display(model.summary())
    
def showHistory(history):
    
    for key in history.history.keys():
        plt.plot(history.history[key], label=key)
    
    plt.legend()
    
def fitModel(model, input_train, output_train, input_validation, output_validation, experiment_path, file_preffix):
    
    createPathIfNotExists(experiment_path)

    # logger_file = os.path.join(experiment_path, file_preffix + '-training-log.csv')
    # csv_logger = CSVLogger(logger_file, append=True)

    checkpoint_validation_file = os.path.join(experiment_path, modelFileSavedFormat(file_preffix))
    checkpoint_validation = ModelCheckpoint(filepath=checkpoint_validation_file, save_best_only=True, monitor='val_acc', mode='max') # verbose=1
    
    early_stopping = EarlyStopping(monitor="val_acc", mode="max", patience=patience, min_delta=min_delta, restore_best_weights=True, verbose=1)

    callbacks=[PlotLossesKerasTF(), checkpoint_validation, early_stopping, ClearTrainingOutput()] # csv_logger

    return model.fit(
        input_train, output_train, validation_data=(input_validation, output_validation), 
        epochs=epochs, batch_size=batch_size, validation_batch_size=batch_size, 
        callbacks=callbacks, shuffle=shuffle, use_multiprocessing=True, workers=16, verbose=0) # verbose=1

def predictModel(model, inputs):
    return model.predict(inputs)
    
def evaluateModel(model, inputs, outputs):
    return model.evaluate(inputs, outputs, batch_size=batch_size, verbose=0)
    
def loadWeights(model, file_path):
    model.load_weights(file_path)

In [None]:
def tuneModel(tuneModel, modelParameters, experiment_by_placement_selected, experiment_by_dataset_selected, input_shape, output_shape):
    
    placement, fields = experiment_by_placement_selected
    subsets = getSubSets(datasets.copy(), fields, data_class_labels)
    normalized_sets = getNormalizedDataRobust(subsets)
    del subsets
    reshaped_sets, window_size = getReshapedData(normalized_sets.copy(), input_shape)
    del normalized_sets
    sets_train = experiment_by_dataset_selected['sets_train']
    sets_test = experiment_by_dataset_selected['sets_test']
    sides_train = experiment_by_dataset_selected['sides_train']
    sides_test = experiment_by_dataset_selected['sides_test']
    input_train, input_validation, output_train, output_validation = getTrainValidationSets(reshaped_sets, sets_train, sets_test, sides_train, sides_test)
    del reshaped_sets

    print("Input Train Shape:", input_train.shape, "Output Train Shape:", output_train.shape)
    print("Input Validation Shape:", input_validation.shape, "Output Validation Shape:", output_validation.shape)

    model_args = modelParameters(input_shape, output_shape)
    model, model_name = tuneModel(**model_args)
    experiment_folder = os.path.join(experiments_folder, model_name)

    tuner = kt.Hyperband(
        model,
        objective='val_acc',
        max_epochs=10,
        factor=3,
        directory=experiment_folder
    )

    early_stopping = EarlyStopping(monitor="val_acc", mode="max", patience=10)
    clear_output = ClearTrainingOutput()

    params = {
        "x": input_train,
        "y": output_train, 
        "validation_data":(input_validation, output_validation),
        "epochs": 10,
        "callbacks":[early_stopping, clear_output]
    }

    tuner.search_space_summary()

    return tuner, params

def getTunerBestResults(tuner, num_trials=1):
    tuner.results_summary(num_trials=num_trials)
    # best_model = tuner.get_best_models(num_models=1)
    return tuner.get_best_hyperparameters(num_trials=num_trials)

## Training Functions

In [None]:
def saveExecutionLog(path, data, columns=['placement', 'experiment', "window", "scaler", "input_shape", "output_shape", "train_loss", "val_loss", "train_acc", "val_acc"]):
    
    """ Save a log for each experiment execution (params for each execution)
    """
    
    save = pd.DataFrame(data=data, columns=columns)
    save.to_csv(os.path.join(path, "experiment-execution-log.csv"), index=False)

In [None]:
def manageFiles(history, experiment_folder, experiment_file):

    """ Retrieve best model from num_tests executions
    """

    test = -1
    index = -1
    val_acc = -1
    
    for i in range(0, num_tests):

        max_value = max(history[i]['val_acc'])

        if max_value > val_acc:
            val_acc = max_value
            test = i
            index = history[i]['val_acc'].index(max_value)
            
    train_acc = history[test]['acc'][index]
    train_loss = history[test]['loss'][index]
    val_acc = history[test]['val_acc'][index]
    val_loss = history[test]['val_loss'][index]
    
    test_folder = os.path.join(experiment_folder, "Test " + str(test + 1)) 
    file = modelFileSavedFormat(experiment_file).format(**{'acc': train_acc, 'val_acc': val_acc})

    move_from = os.path.join(test_folder, file)
    move_to = os.path.join(experiment_folder, file)

    shutil.move(move_from, move_to)
    
    for i in range(0,num_tests):
        shutil.rmtree(os.path.join(experiment_folder, "Test " + str(i + 1)))

    return [train_loss, val_loss, train_acc, val_acc]

In [None]:
def getLoadBar():

    """ Load bar for experiments progress
    """
    
    global load_bar_placement, load_bar_dataset, load_bar_input_shape, load_bar_retries
    
    experiment_total_placement = len(experiment_by_placement)
    experiment_total_dataset = len(experiment_by_dataset)
    experiment_total_input_shapes = len(input_shapes)
    
    load_bar_placement = tqdm(total=experiment_total_placement, desc='Experiment By Placement')
    load_bar_dataset = tqdm(total=experiment_total_dataset, desc='Experiment By Dataset')
    load_bar_input_shape = tqdm(total=experiment_total_input_shapes, desc='Experiment By Input Shape')
    load_bar_retries = tqdm(total=num_tests, desc='Retries')

In [None]:
def run(modelBuilder, modelParameters):

    load_bar_placement.reset()

    for placement, fields in experiment_by_placement:

        load_bar_dataset.reset()
        subsets = getSubSets(datasets.copy(), fields, data_class_labels)
        normalized_sets = getNormalizedDataRobust(subsets) #getNormalizedDataMinMax(subsets, (-1,1))
        
        # Clean Memory
        del subsets
        gc.collect()

        for experiment_number in range(0, len(experiment_by_dataset)):

            load_bar_input_shape.reset()
            sets_train = experiment_by_dataset[experiment_number]['sets_train']
            sets_test = experiment_by_dataset[experiment_number]['sets_test']
            sides_train = experiment_by_dataset[experiment_number]['sides_train']
            sides_test = experiment_by_dataset[experiment_number]['sides_test']
            execution_log = []

            for input_shape in input_shapes:

                load_bar_retries.reset()
                history = []
                model_args = modelParameters(input_shape, output_shape)

                reshaped_sets, window_size = getReshapedData(normalized_sets.copy(), input_shape)
                input_train, input_validation, output_train, output_validation = getTrainValidationSets(reshaped_sets, sets_train, sets_test, sides_train, sides_test)
                
                # Clean Memory
                del reshaped_sets
                gc.collect()

                print("Input Train Shape:", input_train.shape, "Output Train Shape:", output_train.shape)
                print("Input Test Shape:", input_validation.shape, "Output Test Shape:", output_validation.shape)

                for test in range(0, num_tests):

                    model, model_name = modelBuilder(**model_args)
                    experiment_folder = os.path.join(experiments_folder, model_name, placement, "Dataset Experiment " + str(experiment_number + 1))
                    test_folder = os.path.join(experiment_folder, "Test " + str(test + 1))
                    diagram_file = "dataset-experiment-" + str(experiment_number + 1) + "-window-" + str(window_size)
                    experiment_file = diagram_file + "-robust-scaler"
                    saveModelDiagram(model, experiment_folder, diagram_file)
                    fit_history = fitModel(model, input_train, output_train, input_validation, output_validation, test_folder, experiment_file)
                    history.append(fit_history.history)
                    load_bar_retries.update(1)

                    # Clean Memory
                    del model, fit_history
                    gc.collect()

                metrics = manageFiles(history, experiment_folder, experiment_file)
                
                execution_log.append([
                    placement,
                    experiment_number + 1, 
                    window_size, 
                    "Robust Scaler", 
                    str(input_shape), 
                    str(output_shape)
                ] + metrics)

                saveExecutionLog(experiment_folder, execution_log)
                load_bar_input_shape.update(1)

                # Clean Memory
                del history, input_train, input_validation, output_train, output_validation, metrics
                gc.collect()
                time.sleep(5)

            load_bar_dataset.update(1)       
        
        # Clean Memory
        del normalized_sets
        load_bar_placement.update(1)

## Result Analysis

In [None]:
data_class_labels_plot = ['NSB', 'SB']

In [None]:
def generateMetrics(modelBuilder, modelParameters):

    experiment_total_placement = len(experiment_by_placement)
    experiment_total_dataset = len(experiment_by_dataset)
    experiment_total_input_shapes = len(input_shapes)
    
    load_bar_placement = tqdm(total=experiment_total_placement, desc='Experiment By Placement')
    load_bar_dataset = tqdm(total=experiment_total_dataset, desc='Experiment By Dataset')
    load_bar_input_shape = tqdm(total=experiment_total_input_shapes, desc='Experiment By Input Shape')

    load_bar_placement.reset()

    for placement, fields in experiment_by_placement:

        load_bar_dataset.reset()
        subsets = getSubSets(datasets.copy(), fields, data_class_labels)
        normalized_sets = getNormalizedDataRobust(subsets) #getNormalizedDataMinMax(subsets, (-1,1))
        
        # Clean Memory
        del subsets
        gc.collect()

        for experiment_number in range(0, len(experiment_by_dataset)):

            load_bar_input_shape.reset()
            sets_train = experiment_by_dataset[experiment_number]['sets_train']
            sets_test = experiment_by_dataset[experiment_number]['sets_test']
            sides_train = experiment_by_dataset[experiment_number]['sides_train']
            sides_test = experiment_by_dataset[experiment_number]['sides_test']
            experiments_report = []

            for input_shape in input_shapes:

                reshaped_sets, window_size = getReshapedData(normalized_sets.copy(), input_shape)
                input_train, input_validation, output_train, output_validation = getTrainValidationSets(reshaped_sets, sets_train, sets_test, sides_train, sides_test)
                
                # Clean Memory
                del reshaped_sets
                gc.collect()

                model_args = modelParameters(input_shape, output_shape)
                model, model_name = modelBuilder(**model_args)
                experiment_folder = os.path.join(experiments_folder, model_name, placement, "Dataset Experiment " + str(experiment_number + 1))
                experiment_file = "dataset-experiment-" + str(experiment_number + 1) + "-window-" + str(window_size) + "-robust-scaler"

                for file in os.listdir(experiment_folder):
                    if file.startswith(experiment_file):
                        experiment_file = file

                loadWeights(model, os.path.join(experiment_folder, experiment_file))
                predictions = predictModel(model, input_validation)

                experiments_report.append({
                    'params': {
                        'file': experiment_file,
                        'placement': placement,
                        'experiment_number': experiment_number + 1, 
                        'window_size': window_size,
                        'scaler': 'Robust Scaler',
                        'input_shape': str(input_shape),
                        'output_shape': str(output_shape)
                    },
                    'confusion_matrix': confusion_matrix(output_validation.round(0), predictions.round(0)).tolist(),
                    'classification_report': classification_report(output_validation.round(0), predictions.round(0), target_names=data_class_labels_plot, output_dict=True)
                })

                with open(os.path.join(experiment_folder, 'classification_report.json'), 'w') as fp:
                    json.dump(experiments_report, fp, indent=4)

                load_bar_input_shape.update(1)

                # Clean Memory
                del model, predictions, input_train, input_validation, output_train, output_validation
                gc.collect()
                time.sleep(5)

            load_bar_dataset.update(1)       
        
        # Clean Memory
        del normalized_sets
        load_bar_placement.update(1)

In [None]:
def plotConfusionMatrix(values, title):

    """ Plot confusion matrix
    """

    con_mat_df = pd.DataFrame(values, index=data_class_labels_plot, columns=data_class_labels_plot)
    figure = plt.figure(figsize=(4,4))
    sns.set(font_scale=1.2)
    sns.heatmap(con_mat_df, annot=True, cmap=plt.cm.Blues, annot_kws={"size": 14})
    plt.tight_layout()
    plt.title(title)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    figure.savefig('confusion_matrix.png', bbox_inches="tight")

In [None]:
def plotHighDimensionalData(data):
    hip.Experiment.from_dataframe(data).display()

In [None]:
def getInfoData(model_name):

    placement_info = {}
        
    for placement, fields in experiment_by_placement:

        dataset_data = {}
        
        for experiment_number in range(0, len(experiment_by_dataset)):

            folder = os.path.join(experiments_folder, model_name, placement, "Dataset Experiment " + str(experiment_number + 1))
            log_file = os.path.join(folder, 'experiment-execution-log.csv')
            metrics_file = os.path.join(folder, 'classification_report.json')

            dataset_experiment_name = 'dataset_experiment' + str(experiment_number + 1)
            dataset_data[dataset_experiment_name] = {}
            dataset_data[dataset_experiment_name]['logs'] = pd.read_csv(log_file)

            with open(metrics_file) as f:
                dataset_data[dataset_experiment_name]['metrics'] = json.load(f)

        placement_info[placement] = dataset_data

    return placement_info

In [None]:
def getInfoDataParsed(model_name):

    placement_info = {}
        
    for placement, fields in experiment_by_placement:

        dataset_info = {}
        average = None
        
        for experiment_number in range(0, len(experiment_by_dataset)):

            folder = os.path.join(experiments_folder, model_name, placement, "Dataset Experiment " + str(experiment_number + 1))
            log_file = os.path.join(folder, 'experiment-execution-log.csv')
            metrics_file = os.path.join(folder, 'classification_report.json')

            logs_content = pd.read_csv(log_file)

            with open(metrics_file) as f:
                metrics_content = json.load(f)

            window_info = []
            
            for window in [100, 200, 300, 400, 500]:

                log_row = logs_content.loc[logs_content['window'] == window]

                metrics_row = None

                for row in metrics_content:
                    if row['params']['window_size'] == window:
                        metrics_row = row['classification_report']
                        break

                window_info.append({
                    'Window': log_row['window'].values[0],
                    'Train Loss': log_row['train_loss'].values[0],
                    'Val Loss': log_row['val_loss'].values[0],
                    'Train Acc': log_row['train_acc'].values[0],
                    'Val Acc': log_row['val_acc'].values[0],
                    'Precision - SP': metrics_row['With Speed Bump']['precision'],
                    'Precision - NSP': metrics_row['Without Speed Bump']['precision'],
                    'Recall - SP': metrics_row['With Speed Bump']['recall'],
                    'Recall - NSP': metrics_row['Without Speed Bump']['recall'],
                    'F1-Score - SP': metrics_row['With Speed Bump']['f1-score'],
                    'F1-Score - NSP': metrics_row['Without Speed Bump']['f1-score']
                })

            if average is None:
                average = pd.DataFrame.from_dict(window_info)
            else:
                average += pd.DataFrame.from_dict(window_info)
            
            dataset_info["Dataset Experiment " + str(experiment_number + 1)] = pd.DataFrame.from_dict(window_info)
            
        dataset_info["Dataset Average"] = average/3
        placement_info[placement] = dataset_info

    return placement_info

In [None]:
print("V37")