<h1>Deep Learning - Processing</h1>

<h5>Data Parameters</h5>

In [None]:
try:
    from google.colab import output
    IN_COLAB = True
except:
    output = None
    IN_COLAB = False

if IN_COLAB:
    datasets_folder = '/drive/My Drive/Colab Notebooks/DataSets/'
    experiments_folder = '/drive/My Drive/Colab Notebooks/Experiments/'
else:
    datasets_folder = '/Google Drive/Colab Notebooks/DataSets/'
    experiments_folder = '/Google Drive/Colab Notebooks/Experiments/'

print("In Colab:", IN_COLAB)
print("Dataset Folder:", datasets_folder)
print("Experiments Folder:", experiments_folder)

<h5>Importing Packages</h5>

In [None]:
try:
    import livelossplot
except:
    !pip install livelossplot --quiet

In [None]:
import os
import gc
import shutil
import time
import numpy as np
import pandas as pd
import librosa as lr
import librosa.display
import seaborn as sns
import scipy.signal as signal
from matplotlib import colors
import matplotlib.pyplot as plt
from IPython.display import display
from tqdm.notebook import tqdm
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.metrics import confusion_matrix

In [None]:
# -------------------------------------
# TensorFlow/Keras Selection
# -------------------------------------

# useKerasTfV1=False
# useKerasTfV2=False
# useTfV2=True

if useKerasTfV1:

    if IN_COLAB:
        %tensorflow_version 1.x
    
    import tensorflow as tf  
    import keras
    from keras.models import Sequential
    from keras.callbacks import History, ModelCheckpoint, CSVLogger, EarlyStopping
    from keras.layers import Activation, Dense, Dropout, SpatialDropout1D, Conv1D, TimeDistributed, MaxPooling1D, Flatten, ConvLSTM2D, Bidirectional, BatchNormalization, GlobalAvgPool1D, GlobalAveragePooling1D, MaxPooling1D, CuDNNLSTM as LSTM
    from keras.utils import plot_model
    
    from livelossplot import PlotLossesKeras

elif useKerasTfV2:

    import keras
    from keras.models import Sequential
    from keras.callbacks import History, ModelCheckpoint, CSVLogger, EarlyStopping
    from keras.layers import Activation, Dense, Dropout, SpatialDropout1D, Conv1D, TimeDistributed, MaxPooling1D, Flatten, ConvLSTM2D, Bidirectional, BatchNormalization, GlobalAvgPool1D, GlobalAveragePooling1D, MaxPooling1D, LSTM
    from keras.utils import plot_model
    
    from livelossplot import PlotLossesKerasTF as PlotLossesKeras
    
    if not(keras.__version__.startswith("2.3")):
        !pip install "keras>=2.3.0"

elif useTfV2:
    
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.callbacks import History, ModelCheckpoint, CSVLogger, EarlyStopping
    from tensorflow.keras.layers import Activation, Dense, Dropout, SpatialDropout1D, Conv1D, TimeDistributed, MaxPooling1D, Flatten, ConvLSTM2D, Bidirectional, BatchNormalization, GlobalAvgPool1D, GlobalAveragePooling1D, MaxPooling1D, LSTM, GRU
    from tensorflow.keras.utils import plot_model
    
    from livelossplot import PlotLossesKerasTF as PlotLossesKeras

try:
    print("Using Tensorflow", tf.__version__)
except:
    pass

try:
    print("Using Keras", keras.__version__)
except:
    pass

In [None]:
def showDevices():
    print("Devices Tensorflow")

    if useKerasTfV1:
        print(tf.compat.v1.config.list_physical_devices())
    else:
        print(tf.config.list_physical_devices())

    print("")
    print("Devices Nvidia")
    !nvidia-smi

<h5>Data Functions</h5>

In [None]:
# Frequency Domain Params Default
fft_window = 100
fft_step = 1
fft_mode_label=True

In [None]:
def getDataSets(folder=datasets_folder):
    
    """Load raw datasets from the disk.

    Args:
        folder (str): Root folder of PVS datasets. Within this root folder are the PVS folders and their files.

    Returns:
        dict: datasets in a dict form: 
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """

    datasets = {}
    
    for i in range(1, 10):
        
        dataset_folder = os.path.join(folder, "PVS " + str(i))
        
        # left =   pd.read_csv(os.path.join(dataset_folder, 'dataset_gps_mpu_left.csv'),  float_precision="high")
        # right =  pd.read_csv(os.path.join(dataset_folder, 'dataset_gps_mpu_right.csv'), float_precision="high")
        # labels = pd.read_csv(os.path.join(dataset_folder, 'dataset_labels.csv'),        float_precision="high")
        left =   pd.read_csv(os.path.join(dataset_folder, 'dataset_gps_mpu_left.csv'), dtype=np.float32)
        right =  pd.read_csv(os.path.join(dataset_folder, 'dataset_gps_mpu_right.csv'), dtype=np.float32)
        labels = pd.read_csv(os.path.join(dataset_folder, 'dataset_labels.csv'), dtype=np.uint8)
        
        datasets["pvs_" + str(i)] = {
            "left": left,
            "right": right,
            "labels": labels
        }
    
    return datasets

def getFields(acc=False, gyro=False, mag=False, temp=False, speed=False, location=False, below_suspension=False, above_suspension=False, dashboard=False):
    
    """Get fields names filtering by data type and placement.

    Args:
        acc (bool): to return accelerometer fields.
            (default is False)
        gyro (bool): to return gyroscope fields.
            (default is False)
        mag (bool): to return magnetometer fields.
            (default is False)
        temp (bool): to return temperature field.
            (default is False)
        speed (bool): to return speed field.
            (default is False)
        location (bool): to return GPS location fields.
            (default is False)
        below_suspension (bool): to return fields of data sampled next and below suspension.
            (default is False)
        above_suspension (bool): to return fields of data sampled next and above suspension.
            (default is False)
        dashboard (bool): to return fields of data sampled in the dashboard.
            (default is False)

    Returns:
        list: list of fields.
    """

    all_fields = [
        'timestamp', 
        'acc_x_dashboard', 'acc_y_dashboard', 'acc_z_dashboard',
        'acc_x_above_suspension', 'acc_y_above_suspension', 'acc_z_above_suspension', 
        'acc_x_below_suspension', 'acc_y_below_suspension', 'acc_z_below_suspension', 
        'gyro_x_dashboard', 'gyro_y_dashboard', 'gyro_z_dashboard', 
        'gyro_x_above_suspension', 'gyro_y_above_suspension', 'gyro_z_above_suspension',
        'gyro_x_below_suspension', 'gyro_y_below_suspension', 'gyro_z_below_suspension', 
        'mag_x_dashboard', 'mag_y_dashboard', 'mag_z_dashboard', 
        'mag_x_above_suspension', 'mag_y_above_suspension', 'mag_z_above_suspension', 
        'temp_dashboard', 'temp_above_suspension', 'temp_below_suspension', 
        'timestamp_gps', 'latitude', 'longitude', 'speed'
    ]
    
    return_fields = []
    
    for field in all_fields:
            
        data_type = False
        placement = False
        
        if(speed and field == "speed"):
            placement = data_type = True
            
        if(location and (field == "latitude" or field == "longitude")):
            placement = data_type = True
        
        if(acc):
            data_type = data_type or field.startswith("acc_")
        
        if(gyro):
            data_type = data_type or field.startswith("gyro_")
            
        if(mag):
            data_type = data_type or field.startswith("mag_")
            
        if(temp):
            data_type = data_type or field.startswith("temp_")
            
        if(below_suspension):
            placement = placement or field.endswith("_below_suspension")
            
        if(above_suspension):
            placement = placement or field.endswith("_above_suspension")
            
        if(dashboard):
            placement = placement or field.endswith("_dashboard")
        
        if(data_type and placement):
            return_fields.append(field)
            
    return return_fields

def getSubSets(datasets, fields, labels):

    """Get subsets from raw datasets. For each PVS dataset, extract a subset with only fields/labels passed.

    Args:
        datasets (dict): raw PVS datasets.
        fields (string[]): fields to extract.
        labels (string[]): labels to extract.

    Returns:
        dict: subsets in dict form:
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """

    subsets = {}
    
    for key in datasets.keys():
        
        subsets[key] = {
            "left": datasets[key]["left"][fields],
            "right": datasets[key]["right"][fields],
            "labels": datasets[key]["labels"][labels]
        }
    
    return subsets

def getNormalizedDataMinMax(subsets, scaler_range):

    """Get normalized data. Use MinMaxScaler.

    Args:
        subsets (dict): subsets to be normalized.
        scaler_range (tuple): range to scale, such as (0,1) or (-1,1).

    Returns:
        dict: subsets normalized in dict form:
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """

    scaler = MinMaxScaler(feature_range=scaler_range)
    return getNormalizedData(subsets, scaler)

def getNormalizedDataRobust(subsets): 

    """Get standardized data. Use RobustScaler.

    Args:
        subsets (dict): subsets to be standardized.

    Returns:
        dict: subsets normalized in dict form:
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """

    scaler = RobustScaler()            
    return getNormalizedData(subsets, scaler)

def getNormalizedData(subsets, scaler):
    
    """Get standardized/normalized data.

    Args:
        subsets (dict): subsets to be standardized/normalized.
        scaler (object): scaler to transform values.

    Returns:
        dict: subsets normalized in dict form:
        { 
            "pvs_x": { 
                "left": DataFrame, 
                "right": DataFrame, 
                "labels": DataFrame 
            } 
        }
    """
    
    normalized_sets = {}
    learn_data = pd.DataFrame()

    for pvs in subsets.keys():
        for side in ["left", "right"]:
            learn_data = learn_data.append(subsets[pvs][side], ignore_index=True)

    scaler = scaler.fit(learn_data)
    del learn_data
    
    for pvs in subsets.keys():
        
        normalized_sets[pvs] = {
            'left':  pd.DataFrame(data=scaler.transform(subsets[pvs]['left']),  columns=subsets[pvs]['left'].columns),
            'right': pd.DataFrame(data=scaler.transform(subsets[pvs]['right']), columns=subsets[pvs]['right'].columns),
            'labels': subsets[pvs]['labels']
        }
                    
    return normalized_sets # scaler, normalized_sets 

def getReshapedData(subsets, shape, moving_window, mode_label):  

    """Reshape data.

    Args:
        subsets (dict): subsets to be reshaped.
        shape (tuple): shape to reshape data. Must have the form (None, ..., ..., features).
        moving_window (bool): if used moving window in data reshape.
        mode_label (bool): use for output most common value in window, else will be used value at last position in window.

    Returns:
        dict: subsets reshaped in dict form:
        { 
            "pvs_x": { 
                "left": np.array, 
                "right": np.array, 
                "labels": np.array 
            } 
        }
    """

    reshaped_sets = {}

    shape = tuple([x for x in shape if x is not None])

    for key in subsets.keys():

        reshaped_sets[key] = {};

        for side in ['left', 'right']:

            inputs = subsets[key][side].values
            outputs = subsets[key]["labels"].values
  
            window = 1

            for dim in shape:
                window = window * dim

            window = int(window / len(inputs[0]))

            if moving_window:

                inputs_reshaped = []

                if mode_label:
                    outputs_reshaped = []
                else:
                    outputs_reshaped = outputs[window-1:]

                for i in range(window, len(inputs)+1):
                    value = inputs[i-window:i, :]
                    value = value.reshape(shape)
                    inputs_reshaped.append(value)

                    if mode_label:
                        outputs_reshaped.append(outputs[i-window:i, :].mean(axis=0).round(0))

            else:

                inputs_reshaped = []
                outputs_reshaped = []

                chuncks = int(len(inputs)/window)

                for i in range(0, chuncks):
                    value = inputs[i*window : (i+1)*window, :]
                    value = value.reshape(shape)
                    inputs_reshaped.append(value)

                    if mode_label:
                        outputs_reshaped.append(outputs[i*window : (i+1)*window, :].mean(axis=0).round(0))
                    else:
                        outputs_reshaped.append(outputs[((i+1)*window)-1])

            reshaped_sets[key][side] = np.array(inputs_reshaped) # inputs_reshaped
            reshaped_sets[key]['labels'] = np.array(outputs_reshaped) # outputs_reshaped
            del inputs_reshaped, outputs_reshaped

    return reshaped_sets

def getTrainTestSets(preprocessed_sets, sets_train, sets_test, sides=['left', 'right']):

    """Get train and test sets from pre-processed sets.

    Args:
        preprocessed_sets (dict): all pre-processed sets.
        sets_train (string[]): PVS datasets to be used in train.
        sets_test (string[]): PVS datasets to be used in validation.
        sides (string[]): PVS datasets to be used. 

    Returns:
        input_train (list|np.array): input train values.
        input_test (list|np.array): input validation values. 
        output_train (list|np.array): output train values.
        output_test (list|np.array): output validation values. 
    """

    input_train = []
    input_test = []
    output_train = []
    output_test = []

    for key in preprocessed_sets.keys():

        for side in sides:

            inputs = preprocessed_sets[key][side]
            outputs = preprocessed_sets[key]["labels"]
                
            if (key in sets_train):

                for inp in inputs:
                    input_train.append(inp)

                for out in outputs:
                    output_train.append(out)             
              
            elif (key in sets_test):

                for inp in inputs:
                    input_test.append(inp)

                for out in outputs:
                    output_test.append(out) 

    return np.array(input_train), np.array(input_test), np.array(output_train), np.array(output_test) # input_train, input_test, output_train, output_test

def createMemoryMap(path, input_train, input_test, output_train, output_test):

    """Create np.array in file memory. Memory map.

    Args:
        path (string): folder to store maps.
        input_train (list|np.array): values for input train.
        input_test (list|np.array): values for input validation. 
        output_train (list|np.array): values for output train.
        output_test (list|np.array): values for output validation. 

    Returns:
        it_map (list|np.array): map for input train values.
        iv_map (list|np.array): map for input validation values. 
        ot_map (list|np.array): map for output train values.
        ov_map (list|np.array): map for output validation values. 
    """

    it_file = os.path.join(path, 'input_train.dat')
    it_map = np.memmap(it_file, dtype='float64', mode='w+', shape=((len(input_train),) + input_train[0].shape))
    it_map[:] = input_train[:]

    iv_file = os.path.join(path, 'input_test.dat')
    iv_map = np.memmap(iv_file, dtype='float64', mode='w+', shape=((len(input_test),) + input_test[0].shape))
    iv_map[:] = input_test[:]

    ot_file = os.path.join(path, 'output_train.dat')
    ot_map = np.memmap(ot_file, dtype='float64', mode='w+', shape=((len(output_train),) + output_train[0].shape))
    ot_map[:] = output_train[:]

    ov_file = os.path.join(path, 'output_test.dat')
    ov_map = np.memmap(ov_file, dtype='float64', mode='w+', shape=((len(output_test),) + output_test[0].shape))
    ov_map[:] = output_test[:]

    return it_map, iv_map, ot_map, ov_map

def getFrequencyFeaturesSTFT(subsets, fft_window=fft_window, fft_step=fft_step, fft_mode_label=fft_mode_label):

    frequency_sets = {}

    for key in subsets.keys():

        frequency_sets[key] = {};

        for side in ['left', 'right']:

            inputs = subsets[key][side]
            inputs_new = pd.DataFrame([])

            for column in inputs.columns:
                frequencies = tf.signal.stft(inputs[column].values, frame_length=fft_window, frame_step=fft_step, fft_length=fft_window)
                magnitude = tf.abs(frequencies)
                new_np = magnitude.numpy()
                new_df = pd.DataFrame(data=new_np, columns=[column + "_freq_" + str(x) for x in range(0, new_np.shape[1])])
                inputs_new = pd.concat([inputs_new, new_df], axis=1)
                del frequencies, magnitude, new_np, new_df

            frequency_sets[key][side] = inputs_new
            del inputs_new
        
        outputs = subsets[key]['labels'].values

        if fft_mode_label:

            outputs_new = []

            for i in range(fft_window, len(outputs)+1):
                outputs_new.append(outputs[i-fft_window:i, :].mean(axis=0).round(0))

        else:
            outputs_new = outputs[fft_window-1:]

        frequency_sets[key]['labels'] = pd.DataFrame(data=outputs_new, columns=subsets[key]['labels'].columns)
        del outputs_new

    return frequency_sets

In [None]:
def addCompositeAccelerationFeatures(subsets, fields):
    
    """Add composite acceleration features to dataset.
    Reference: https://www.sciencedirect.com/science/article/abs/pii/S0966636219306952

    Args:
        subsets (dict): PVS subsets to add features.
        fields (string[]): fields of subsets.

    Returns:
        subsets (dict): subsets with new features.
        fields (string[]): original fields more composite acceleration features. 
    """

    acc_x_field = [field for field in fields if "acc_x" in field]
    acc_y_field = [field for field in fields if "acc_y" in field]
    acc_z_field = [field for field in fields if "acc_z" in field]

    fields = ["acc_x_y", "acc_y_z", "acc_x_z", "acc_x_y_z"] + fields

    for pvs in subsets.keys():
        
        for side in ["left", "right"]:
        
            acc_x_power = np.power(subsets[pvs][side][acc_x_field], 2).values
            acc_y_power = np.power(subsets[pvs][side][acc_y_field], 2).values
            acc_z_power = np.power(subsets[pvs][side][acc_z_field], 2).values

            composite_x_y = np.sqrt(acc_x_power + acc_y_power)
            composite_y_z = np.sqrt(acc_y_power + acc_z_power)
            composite_x_z = np.sqrt(acc_x_power + acc_z_power)
            composite_x_y_z = np.sqrt(acc_x_power + acc_y_power + acc_z_power)

            subsets[pvs][side].insert(0, "acc_x_y", composite_x_y)
            subsets[pvs][side].insert(1, "acc_y_z", composite_y_z)
            subsets[pvs][side].insert(2, "acc_x_z", composite_x_z)
            subsets[pvs][side].insert(3, "acc_x_y_z", composite_x_y_z)

    return subsets, fields

<h5>Model Management</h5>

In [None]:
def createPathIfNotExists(path):

    if not os.path.exists(path):
        os.makedirs(path)

def modelFileSavedFormat(file):
    return file + '-train-acc-{acc:.5f}-val-acc-{val_acc:.5f}.hdf5'

def saveModelDiagram(model, path, file, show=True):
    createPathIfNotExists(path)
    plot_model(model, to_file=os.path.join(path, file + '.png'), show_shapes=True, show_layer_names=True)

    if show:
        display(plot_model(model, show_shapes=True, show_layer_names=True))
        display(model.summary())
    
def showHistory(history):
    
    for key in history.history.keys():
        plt.plot(history.history[key], label=key)
    
    plt.legend()
    
def fitModel(model, inputs_train, outputs_train, inputs_validation, outputs_validation, path, file, batch_size=64, epochs=10000, patience=50):
    
    createPathIfNotExists(path)
    
    # train_folder = os.path.join(path, 'train')
    # createPathIfNotExists(train_folder)
    # checkpoint_file_train = os.path.join(train_folder, 'checkpoint-train-{epoch:002d}-{loss:.10f}-{acc:.5f}-val-{val_loss:.10f}-{val_acc:.5f}.hdf5')
    # checkpoint_train = ModelCheckpoint(filepath=checkpoint_file_train, save_best_only=True, monitor='acc', mode='max')

    # validation_folder = os.path.join(path, 'validation')
    # createPathIfNotExists(validation_folder)
    # checkpoint_file_validation = os.path.join(validation_folder, 'checkpoint-{epoch:002d}-train-{loss:.10f}-{acc:.5f}-val-{val_loss:.10f}-{val_acc:.5f}.hdf5')
    # checkpoint_validation = ModelCheckpoint(filepath=checkpoint_file_validation, save_best_only=True, monitor='val_acc', mode='max', verbose=1)

    checkpoint_file_validation = os.path.join(path, modelFileSavedFormat(file))
    checkpoint_validation = ModelCheckpoint(filepath=checkpoint_file_validation, save_best_only=True, monitor='val_acc', mode='max') # verbose=1
    
    # logger_file = os.path.join(path, file + '-training-log.csv')
    # csv_logger = CSVLogger(logger_file, append=True)
    
    plotlosses = PlotLossesKeras()
    
    early_stopping = EarlyStopping(monitor='val_acc', mode='max', verbose=1, patience=patience, min_delta=0.0001, restore_best_weights=True)
    
    # callbacks=[csv_logger, checkpoint_train, checkpoint_validation, early_stopping]
    callbacks=[plotlosses, checkpoint_validation, early_stopping]
    return model.fit(inputs_train, outputs_train, validation_data=(inputs_validation, outputs_validation), epochs=epochs, batch_size=batch_size, callbacks=callbacks, verbose=0) # verbose=1

def predictModel(model, inputs):
    return model.predict(inputs)
    
def evaluateModel(model, inputs, outputs, batch_size=64):
    return model.evaluate(inputs, outputs, batch_size=batch_size, verbose=0)
    
def loadWeights(model, pathFile):
    model.load_weights(pathFile)

<h5>Parameter Variations</h5>

In [None]:
experiment_by_dataset = [
    { "train": ["pvs_1", "pvs_3", "pvs_4", "pvs_6", "pvs_7", "pvs_9"], "test":  ["pvs_2", "pvs_5", "pvs_8"]},
    { "train": ["pvs_1", "pvs_2", "pvs_3", "pvs_7", "pvs_8", "pvs_9"], "test":  ["pvs_4", "pvs_5", "pvs_6"]},
    { "train": ["pvs_1", "pvs_2", "pvs_4", "pvs_6", "pvs_8", "pvs_9"], "test":  ["pvs_3", "pvs_5", "pvs_7"]}
]

experiment_by_fields = [
    ("Below Suspension", getFields(acc=True, gyro=True, speed=True, below_suspension=True)),
    ("Above Suspension", getFields(acc=True, gyro=True, speed=True, above_suspension=True)),
    ("Dashboard",        getFields(acc=True, gyro=True, speed=True, dashboard=True))
]

<h5>Labels Fields</h5>

In [None]:
surface_type_labels = ["land", "cobblestone", "asphalt"]

In [None]:
surface_type_labels_plot = ["Dirt \n Road", "Cobblestone \n Road", "Asphalt \n Road"]  

<h5>Execution Log</h5>

In [None]:
# Save a log for each experiment execution (params for each execution)
def saveExecutionLog(path, data, columns=['placement', 'experiment', "window", "scaler", "input_shape", "output_shape", "train_loss", "val_loss", "train_acc", "val_acc"]):
    save = pd.DataFrame(data=data, columns=columns)
    save.to_csv(os.path.join(path, "experiment-execution-log.csv"), index=False)

<h5>Training Functions</h5>

In [None]:
def manageFiles(history, experiment_folder, experiment_file):

    test = -1
    index = -1
    val_acc = -1
    
    for i in range(0,3):

        max_value = max(history[i]['val_acc'])

        if max_value > val_acc:
            val_acc = max_value
            test = i
            index = history[i]['val_acc'].index(max_value)
            
    train_acc = history[test]['acc'][index]
    train_loss = history[test]['loss'][index]
    val_acc = history[test]['val_acc'][index]
    val_loss = history[test]['val_loss'][index]
    
    test_folder = os.path.join(experiment_folder, "Test " + str(test + 1)) 
    file = modelFileSavedFormat(experiment_file).format(**{'acc': train_acc, 'val_acc': val_acc})

    move_from = os.path.join(test_folder, file)
    move_to = os.path.join(experiment_folder, file)

    shutil.move(move_from, move_to)
    
    for i in range(0,3):
        shutil.rmtree(os.path.join(experiment_folder, "Test " + str(i + 1)))

    return [train_loss, val_loss, train_acc, val_acc]

In [None]:
def getLoadBar():
    
    global load_bar_placement, load_bar_dataset, load_bar_experiment, load_bar_retries
    
    experiment_total_placement = len(experiment_by_fields)
    experiment_total_dataset = len(experiment_by_dataset)
    experiment_total_iteration = len(input_shapes)
    
    load_bar_placement = tqdm(total=experiment_total_placement, desc='Placement Progress')
    load_bar_dataset = tqdm(total=experiment_total_dataset, desc='Dataset Progress')
    load_bar_experiment = tqdm(total=experiment_total_iteration, desc='Input Shapes Progress')
    load_bar_retries = tqdm(total=3, desc='Retries')

In [None]:
def run(model_fn, batch_size=64, patience=50, epochs=10000, retries=3, addCompositeAccFeatures=False, frequencyDomain=False):

    load_bar_placement.reset()

    for placement, fields in experiment_by_fields:

        load_bar_placement.set_description(placement)
        load_bar_dataset.reset()
        
        subsets = getSubSets(datasets.copy(), fields, surface_type_labels)

        if addCompositeAccFeatures:
            subsets, fields = addCompositeAccelerationFeatures(subsets, fields)

        if frequencyDomain:
            subsets = getFrequencyFeaturesSTFT(subsets)
            # fields = []
            normalized_sets = getNormalizedDataMinMax(subsets, (0,1))
        else:
            normalized_sets = getNormalizedDataMinMax(subsets, (-1,1))
        
        del subsets
        gc.collect()

        for experiment_number in range(0, len(experiment_by_dataset)):

            load_bar_experiment.reset()
            load_bar_experiment.set_description("Experiment " + str(experiment_number + 1))

            sets_train = experiment_by_dataset[experiment_number]['train']
            sets_test = experiment_by_dataset[experiment_number]['test']

            execution_log = []

            for input_shape, window_size in input_shapes:

                model_args = parameters(input_shape, output_shape)

                history = []

                reshaped_sets = getReshapedData(normalized_sets.copy(), input_shape, moving_window, mode_label)
                input_train, input_test, output_train, output_test = getTrainTestSets(reshaped_sets, sets_train, sets_test, sides)
                del reshaped_sets
                gc.collect()

                print("Input Train Shape:", input_train.shape, "Output Train Shape:", output_train.shape)
                print("Input Validation Shape:", input_test.shape, "Output Validation Shape:", output_test.shape)

                for test in range(0, retries):

                    model, model_name = model_fn(**model_args)

                    experiment_folder = os.path.join(experiments_folder, model_name, placement, "Experiment " + str(experiment_number + 1))
                    test_folder = os.path.join(experiment_folder, "Test " + str(test + 1))
                    diagram_file = "experiment-" + str(experiment_number + 1) + "-window-" + str(window_size)
                    experiment_file = diagram_file + "-min-max-scaler-" + ("(0,1)" if frequencyDomain else "(-1,1)")

                    saveModelDiagram(model, experiment_folder, diagram_file)
                    hist = fitModel(model, input_train, output_train, input_test, output_test, test_folder, experiment_file, patience=patience, batch_size=batch_size, epochs=epochs)

                    history.append(hist.history)
                    load_bar_retries.update(1)

                    # Clean Memory
                    del model, hist
                    # model, hist = [None, None]
                    
                    if not(output is None):
                        output.clear()

                    gc.collect()

                metrics = manageFiles(history, experiment_folder, experiment_file)
                
                execution_log.append([
                    placement,
                    experiment_number + 1, 
                    window_size, 
                    "Min Max Scaler" + ("(0,1)" if frequencyDomain else "(-1,1)"), 
                    str(input_shape), 
                    str(output_shape)
                ] + metrics)

                saveExecutionLog(experiment_folder, execution_log)
                load_bar_experiment.update(1)

                # Clean
                del history, input_train, input_test, output_train, output_test, metrics
                load_bar_retries.reset()
                gc.collect()
                time.sleep(5)

            load_bar_dataset.update(1)       
        
        del normalized_sets
        load_bar_placement.update(1)

<h5>Plot Functions</h5>

In [1]:
# Plot best model CNN 7 features window 300 time domain
def confusionMatrix(files, title, model_fn, input_shape, output_shape, experiment_by_fields):

    placement = experiment_by_fields[0] 
    fields = experiment_by_fields[1] 

    matrix = []
    subsets = getSubSets(datasets.copy(), fields, surface_type_labels)
    normalized_sets = getNormalizedDataMinMax(subsets, (-1,1))
    reshaped_sets = getReshapedData(normalized_sets, input_shape, moving_window, mode_label)

    for experiment_number in range(0,3):

        sets_train = experiment_by_dataset[experiment_number]['train']
        sets_test = experiment_by_dataset[experiment_number]['test']

        input_train, input_test, output_train, output_test = getTrainTestSets(reshaped_sets.copy(), sets_train, sets_test, sides)

        model_args = parameters(input_shape, output_shape)
        model, model_name = model_fn(**model_args)
        loadWeights(model, os.path.join(experiments_folder, model_name, placement, "Experiment " + str(experiment_number + 1), files[experiment_number]))
        predictions = predictModel(model, input_test)
        matrix.append(confusion_matrix(output_test.argmax(axis=1), predictions.argmax(axis=1), normalize="true"))

    values = ((matrix[0] + matrix[1] + matrix[2])/3)*100
    con_mat_df = pd.DataFrame(values, index=surface_type_labels_plot, columns=surface_type_labels_plot)
    figure = plt.figure(figsize=(4,4))
    sns.set(font_scale=1.2)
    sns.heatmap(con_mat_df, annot=True, cmap=plt.cm.Blues, annot_kws={"size": 14})
    plt.tight_layout()
    plt.title(title)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    figure.savefig('confusion_matrix.png', bbox_inches="tight")

In [None]:
# https://towardsdatascience.com/how-to-easily-process-audio-on-your-gpu-with-tensorflow-2d9d91360f06
def plotMagnitudeSpectogram(freq, sampling_rate, fft_step):
    S = freq.T
    plt.figure(figsize=(20,6))
    lr.display.specshow(S, y_axis='linear', x_axis='frames', sr=sampling_rate, hop_length=fft_step)
    plt.colorbar()
    plt.title('Magnitude Spectrogram')
    plt.tight_layout()
    plt.ylabel('Frequency (Hz)')
    plt.xlabel('Time (10ms)')
    plt.savefig('magnitude_spectogram_librosa.png', dpi=300)

In [None]:
# https://towardsdatascience.com/how-to-easily-process-audio-on-your-gpu-with-tensorflow-2d9d91360f06
def plotLogMagnitudeSpectogram(freq, sampling_rate, fft_step):
    S = freq.T
    plt.figure(figsize=(20,6))
    lr.display.specshow(lr.amplitude_to_db(S, ref=np.max), y_axis='linear', x_axis='frames', sr=sampling_rate, hop_length=fft_step)
    plt.colorbar(format='%+2.0f Db')
    plt.title('Log-Magnitude Spectrogram')
    plt.tight_layout()
    plt.ylabel('Frequency (Hz)')
    plt.xlabel('Time (10ms)')
    plt.savefig('log_magnitude_spectogram_librosa.png', dpi=300)

In [None]:
print("V2")