# Test the Guitarist's Touch
Test a model trained on only one guitar and guitarist against new data of:
1. The same guitarist and guitar
2. A different guitarist (same guitar)

In [None]:
TRAIN_ONLY_ON_FULL_DATASET = True
MAKE_SURE_THAT_MICROS_IS_IN_THE_DATASET = True # In the future remove this and uses

## Import modules and mount drive folder

In [None]:
# Choose ClassificationTask task
from enum import Enum
class ClassificationTask(Enum):
    FULL_8_CLASS_PROBLEM,BINARY_PERCUSSIVE_PITCHED,PERCUSSIVE_4_ONLY,PITCHED_4_ONLY,PERCUSSIVE_PLUS_PITCHED_CLASS,ONE_GUITARIST_FULL,LIMITED_DATASET_NUMBER_FULL_8 = ((1,'full'), (2,'binary'), (3,'perc'), (4,'pitch'), (5,'perc+pitch'), (6,'one-guit-full'), (7,'guit-touch'))
class FeatureSelection(Enum):
    NONE,MANUAL_VARIABLES,MANUAL_LIST,AUTO_ANOVA,AUTO_RELIEF = (0, 1, 2, 3, 4)
class FeatureWindowSize(Enum):
    s4800_SAMPLES_100ms, s704_Samples_14ms, _704windowed, _2112windowed, _3456windowed, _4800windowed = (1,2,3,4,5,6)

class WindowedInputMode(Enum):
    _1D, _2D = (1, 2)

In [None]:
"""
    Specify the classification task to run here.
    Names should be self-explanatory.
"""

classification_task = ClassificationTask.LIMITED_DATASET_NUMBER_FULL_8 # Enable this to do a study where there is a limited number of guitars/guitarists in the dataset

assert classification_task == ClassificationTask.LIMITED_DATASET_NUMBER_FULL_8 # Enable this to do a study where there is a limited number of guitars/guitarists in the dataset

# This is only used if classification_task == ClassificationTask.LIMITED_DATASET_NUMBER_FULL_8
# WHICH_GUITAR_PLAYER_COMBOS_TO_USE = ['f'] # We have extra data for Guitarist 1 & Guitar 5.
# WHICH_GUITAR_PLAYER_COMBOS_TO_USE = ['f','g']
# WHICH_GUITAR_PLAYER_COMBOS_TO_USE = ['f','g','d']
# WHICH_GUITAR_PLAYER_COMBOS_TO_USE = ['f','g','e','d']
# WHICH_GUITAR_PLAYER_COMBOS_TO_USE = ['f','g','e','d','c']
# WHICH_GUITAR_PLAYER_COMBOS_TO_USE = ['f','g','e','d','c','b']
WHICH_GUITAR_PLAYER_COMBOS_TO_USE = ['f','g','e','d','c','b','a']


"""
    Specify the feature selection method to use here.
"""
# FEATURE_SELECTION = FeatureSelection.MANUAL_VARIABLES
# FEATURE_SELECTION = FeatureSelection.MANUAL_LIST
FEATURE_SELECTION = FeatureSelection.AUTO_ANOVA #ANOVA: https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_classif.html
# FEATURE_SELECTION = FeatureSelection.AUTO_RELIEF

"""
    Specify the feature window size to use here.
"""
FEATURE_WINDOW_SIZE = FeatureWindowSize._704windowed

"""
    If the input is windowed, then specify the windowed input mode here.
"""
# WINDOWED_INPUT_MODE = WindowedInputMode._1D
WINDOWED_INPUT_MODE = WindowedInputMode._2D

"""
    Specify the scaler to use here.
"""
# SCALER_TO_USE = 'StandardScaler'
SCALER_TO_USE = 'MinMaxScaler'


""" 
    Specify whether to train the final model on the full dataset or not.
    Might want to set this to False if you are doing a lot of testing.
    Final model can then be trained at any from the backup script in the output folders.
"""
TRAIN_FINAL_MODEL = False

REQUIRE_GPU = True

DO_SAVE_TENSORBOARD_LOGS = False 
DO_SAVE_FOLD_MODELS = False 
DROP_EXTRA_PERCUSSIVE_SOUNDS = True # If true, drop the data from files that have 'extra' in the filename, which otherwise make the dataset unbalanced
# --> Quantize (Dynamic) and test the TF Lite model obtained (quicker but lower accuracy)
DO_TEST_QUANTIZATION = True

USE_TENSORBOARD = True

USE_AUGMENTED_DATA = False
DROP_EXTRA_PERCUSSIVE_SOUNDS_FROMAUG = False
DO_NORMALIZE_DATA = True
DO_NORMALIZE_FOR_FEATURE_SELECTION = True

TEST_WITH_EXTRA_DATA = True

# DROP_ADDITIONAL_CEPSTRUM_FROM_BIG_WINDOW = True # tODO: Might want to remove this in the future

In [None]:
# WindowedInputMode._2D implies that 'windowed' is in FEATURE_WINDOW_SIZE.name
assert (not (WINDOWED_INPUT_MODE == WindowedInputMode._2D)) or ('windowed' in FEATURE_WINDOW_SIZE.name), "WindowedInputMode._2D implies that 'windowed' is in FEATURE_WINDOW_SIZE.name"

from sklearn.preprocessing import StandardScaler, MinMaxScaler
if SCALER_TO_USE == 'StandardScaler':
    SCALER_TO_USE = StandardScaler()
elif SCALER_TO_USE == 'MinMaxScaler':
    SCALER_TO_USE = MinMaxScaler()


In [None]:

# Load the TensorBoard notebook extension
if USE_TENSORBOARD:
    %load_ext tensorboard

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
from sys import executable as sys_executable
from sys import argv as sys_argv
import pandas as pd
import numpy as np
import tensorflow as tf
from time import strftime, time
import pickle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from bz2 import BZ2File # To open compressed data
import re
import shutil
import imblearn
from sklearn.metrics import confusion_matrix as sk_conf_matrix
from sklearn.metrics import classification_report as sk_class_report
from sklearn.metrics import ConfusionMatrixDisplay as sk_conf_matrix_disp
import requests
from skrebate import ReliefF
import subprocess
import sys
import warnings
import os, platform, subprocess, re
import matplotlib.pyplot as plt
import gc
import hashlib
from glob import glob
from typing import Tuple


print("Tensorflow version: " + tf.version.VERSION)
print('Imblearn version:',imblearn.__version__)

global_random_state = 43

def seed_everything(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
seed_everything(global_random_state)

# Set this so that long arrays are printed in full
np.set_printoptions(threshold=np.inf)

COLAB = 'google.colab' in str(get_ipython())

if COLAB:
    print('Running on CoLab')    #Connect and mount the drive folder that contains the train dataset and the output folder
    from google.colab import drive
    drive.mount('/content/gdrive', force_remount=False)

    HOMEBASE = '/content/gdrive/MyDrive/dottorato/Publications/02-IEEE-RTEmbeddedTimbreClassification(submitted)/Classifier'
else:
    print('Not running on CoLab')
    HOMEBASE = "."

DATAFOLDER = os.path.join(HOMEBASE,"data/phase3")
MODELFOLDER = os.path.join(HOMEBASE,"output")
assert os.path.exists(DATAFOLDER) and os.path.isdir(DATAFOLDER), "Data folder not found in HOMEBASE path: '%s'\nMake sure to change the HOMEBASE in the script if you are running from a strange folder." % HOMEBASE
assert os.path.exists(MODELFOLDER) and os.path.isdir(MODELFOLDER), "Model folder not found in HOMEBASE path: '%s'\nMake sure to change the HOMEBASE in the script if you are running from a strange folder." % HOMEBASE

RELIEF_CACHE_FILEPATH = os.path.join(DATAFOLDER,'relief_cache.pickle')


metaset_root_path = os.path.abspath(os.path.join(DATAFOLDER, 'expressiveGuitarTechniquesDataset'))
sys.path.insert(1,metaset_root_path)
import ExpressiveGuitarTechniquesDataset as egtdataset


In [None]:
def is_notebook() -> bool:
    try:
        shell = get_ipython().__class__.__name__
        if shell == 'ZMQInteractiveShell':
            return True   # Jupyter notebook or qtconsole
        elif shell == 'TerminalInteractiveShell':
            return False  # Terminal running IPython
        else:
            return False  # Other type (?)
    except NameError:
        return False      # Probably standard Python interpreter

## Enforce GPU usage

In [None]:
# sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))
physical_devices = tf.config.list_physical_devices('GPU') 

for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

print(physical_devices)
if REQUIRE_GPU:
  assert len(tf.config.experimental.list_physical_devices('GPU')) >= 1, "No GPU found. Please enable GPU in the runtime settings."

## Check Real avaliable GRAM

In [None]:
def pip_install(package):
    subprocess.check_call([executable, "-m", "pip", "install", package])

In [None]:
CHECK_GRAM = False

if CHECK_GRAM:
    # memory footprint support libraries/code
    os.symlink('/opt/bin/nvidia-smi','/usr/bin/nvidia-smi')
    pip_install('gputil')
    pip_install('psutil')
    pip_install('humanize')
    import psutil, humanize, os, GPUtil as GPU
    GPUs = GPU.getGPUs()
    # XXX: only one GPU on Colab and isn’t guaranteed
    gpu = GPUs[0]
    def printm():
        process = psutil.Process(os.getpid())
        print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
        print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
    printm()

# Import Feature set

In [None]:
EXTRA_TEST_DATA_PATH = None
if not MAKE_SURE_THAT_MICROS_IS_IN_THE_DATASET and FEATURE_WINDOW_SIZE == FeatureWindowSize.s4800_SAMPLES_100ms:
    DATASET_FILENAME = 'onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20221206-165551_SUPERLONGdataset-phase3PROCESSED_FEATURES.pickle'
elif not MAKE_SURE_THAT_MICROS_IS_IN_THE_DATASET and FEATURE_WINDOW_SIZE == FeatureWindowSize.s704_Samples_14ms:
    DATASET_FILENAME = 'onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20221201-182312_REALTIMEdataset-phase3PROCESSED_FEATURES.pickle'
elif not MAKE_SURE_THAT_MICROS_IS_IN_THE_DATASET and FEATURE_WINDOW_SIZE == FeatureWindowSize._704windowed:
    DATASET_FILENAME = 'paper-onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230119-141803_windowed704-mainset-phase3PROCESSED_FEATURES.pickle'
elif not MAKE_SURE_THAT_MICROS_IS_IN_THE_DATASET and FEATURE_WINDOW_SIZE == FeatureWindowSize._2112windowed:
    DATASET_FILENAME = 'paper-onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230119-163834_windowed2112-mainset-phase3PROCESSED_FEATURES.pickle'
elif not MAKE_SURE_THAT_MICROS_IS_IN_THE_DATASET and FEATURE_WINDOW_SIZE == FeatureWindowSize._3456windowed:
    DATASET_FILENAME = 'paper-onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230120-081628_windowed3456-mainset-phase3PROCESSED_FEATURES.pickle'
elif not MAKE_SURE_THAT_MICROS_IS_IN_THE_DATASET and FEATURE_WINDOW_SIZE == FeatureWindowSize._4800windowed:
    DATASET_FILENAME = 'paper-onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230119-105343_windowed4800-mainset-phase3PROCESSED_FEATURES.pickle'
elif FEATURE_WINDOW_SIZE == FeatureWindowSize._704windowed:
    DATASET_FILENAME = 'onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230301-100318_windowed704-mainset-phase3PROCESSED_FEATURES.pickle'
    EXTRA_TEST_DATA_PATH =  '../extra-test-features/onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230306-172035_windowed704-EXTRATESTPROCESSED_FEATURES.pickle'
elif FEATURE_WINDOW_SIZE == FeatureWindowSize._2112windowed:
    DATASET_FILENAME = 'onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230302-094722_windowed2112-mainset-phase3PROCESSED_FEATURES.pickle'
elif FEATURE_WINDOW_SIZE == FeatureWindowSize._3456windowed:
    DATASET_FILENAME = 'onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230302-122447_windowed3456-mainset-phase3PROCESSED_FEATURES.pickle'
elif FEATURE_WINDOW_SIZE == FeatureWindowSize._4800windowed:
    DATASET_FILENAME = 'onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230302-163445_windowed4800-mainset-phase3PROCESSED_FEATURES.pickle'
else:
    raise ValueError('Invalid FeatureWindowSize "%s" with MAKE_SURE_THAT_MICROS_IS_IN_THE_DATASET==%s'%(FEATURE_WINDOW_SIZE.name,str(MAKE_SURE_THAT_MICROS_IS_IN_THE_DATASET)))

print('Loading dataset from file:',DATASET_FILENAME)


if os.path.splitext(DATASET_FILENAME)[1] == '.bz2':
    print("Reading dataset from compressed pickle...")
    DATASET_PATH = os.path.join(DATAFOLDER,DATASET_FILENAME)
    startime = time()
    ifile = BZ2File(DATASET_PATH,'rb')
    featuredataset = pickle.load(ifile)
    ifile.close()
    if EXTRA_TEST_DATA_PATH is not None:
        ifile = BZ2File(os.path.join(DATAFOLDER,EXTRA_TEST_DATA_PATH),'rb')
        extratest_featuredataset = pickle.load(ifile)
        ifile.close()
    print('Successfully Loaded!\nIt took %.1fs to load from compressed pickle' % (time()-startime))
elif os.path.splitext(DATASET_FILENAME)[1] == '.pickle':
    print("Reading dataset from pickle...")
    DATASET_PATH = os.path.join(DATAFOLDER,DATASET_FILENAME)
    startime = time()
    with open(DATASET_PATH,'rb') as pf:
        featuredataset = pickle.load(pf)
    if EXTRA_TEST_DATA_PATH is not None:
        with open(os.path.join(DATAFOLDER,EXTRA_TEST_DATA_PATH),'rb') as pf:
            extratest_featuredataset = pickle.load(pf)
    print('Successfully Loaded!\nIt took %.1fs to load from regular pickle' % (time()-startime))
else:
    raise Exception("Extension %s not supported!" % os.path.splitext(DATASET_FILENAME)[1])
print('Dataset loaded!')
if EXTRA_TEST_DATA_PATH is not None:
    print('Extra test dataset loaded too!')
# display(featuredataset)
DATA_IS_WINDOWED = featuredataset.columns.str.match('0_').any()
WINDOW_INDEXES = sorted(list(set([int(e.split('_')[0]) for e in featuredataset.columns[featuredataset.columns.str.match('\d+_')].to_list()])))
print('Data is WINDOWED!' if DATA_IS_WINDOWED else '', '%d windows' % (len(WINDOW_INDEXES)))

In [None]:
assert not (TEST_WITH_EXTRA_DATA and EXTRA_TEST_DATA_PATH is None), 'TEST_WITH_EXTRA_DATA is True but EXTRA_TEST_DATA_PATH is None!'

## Importing also the dataset metadata

In [None]:
egtdb = egtdataset.import_db(basepath = metaset_root_path,
                     import_private_tables = False, 
                     do_test_integrity = False, 
                     verbose = True)

In [None]:
augmented_featuredataset_list = []
if USE_AUGMENTED_DATA:
    augmented_data_paths = glob(os.path.join(DATAFOLDER,'augmented_data','*.pickle'))
    for augmented_data_path in augmented_data_paths:
        print("Loading file %s" % os.path.basename(augmented_data_path))
        with open(augmented_data_path,'rb') as pf:
            augmented_featuredataset_list.append(pickle.load(pf))
    augmented_featuredataset = pd.concat(augmented_featuredataset_list, ignore_index=True)
    print("Loaded %d augmented samples" % len(augmented_featuredataset))

### Drop features that we have found to be problematic with feature selection and training

In [None]:
def drop_unused_features_old(features_df: pd.DataFrame, is_windowed = False, inplace = False) -> pd.DataFrame:
    if not inplace:
        res_df = features_df.copy()
        print('Copied',flush=True)
    else:
        res_df = features_df
        print('Not Copied',flush=True)


    if is_windowed:
        for window_index in WINDOW_INDEXES:
            print('Dropping bad columns for index %d'%(window_index),flush=True)

            if '%s_attackTime_peaksamp'%window_index       not in res_df.columns.to_list() or\
               '%s_attackTime_attackStartIdx'%window_index not in res_df.columns.to_list() or\
               '%s_peakSample_index'%window_index          not in res_df.columns.to_list():
                # raise Exception("The features dataframe does not contain the required columns!")
                # Show warning instead of exception
                print("Warning! The features dataframe does not contain the required columns! (%s, %s, %s)"%('%s_attackTime_peaksamp'%window_index,'%s_attackTime_attackStartIdx'%window_index,'%s_peakSample_index'%window_index))
            else:
                res_df.drop(columns=['%s_attackTime_peaksamp' % window_index,\
                                        '%s_attackTime_attackStartIdx' % window_index,\
                                        '%s_peakSample_index' % window_index], inplace=True)
    else:
        if 'attackTime_peaksamp'       not in res_df.columns.to_list() or\
        'attackTime_attackStartIdx' not in res_df.columns.to_list() or\
        'peakSample_index'          not in res_df.columns.to_list():
            # raise Exception("The features dataframe does not contain the required columns!")
            print("Warning! The features dataframe does not contain the required columns! (%s, %s, %s)" % ('attackTime_peaksamp' in res_df.columns.to_list(), 'attackTime_attackStartIdx' in res_df.columns.to_list(), 'peakSample_index' in res_df.columns.to_list()))
        else:
            res_df.drop(columns=['attackTime_peaksamp',\
                                    'attackTime_attackStartIdx',\
                                    'peakSample_index'], inplace=True)
    return res_df

def drop_unused_features(features_df: pd.DataFrame, is_windowed = False, inplace = False) -> pd.DataFrame:
    if not inplace:
        res_df = features_df.copy()
        print('Copied',flush=True)
    else:
        res_df = features_df
        print('Not Copied',flush=True)

    todrop = []
    if is_windowed:
        for window_index in WINDOW_INDEXES:
            if '%s_attackTime_peaksamp'%window_index not in res_df.columns.to_list() or '%s_attackTime_attackStartIdx'%window_index not in res_df.columns.to_list() or '%s_peakSample_index'%window_index not in res_df.columns.to_list():
                print("Warning! The features dataframe does not contain the required columns! (%s, %s, %s)"%('%s_attackTime_peaksamp'%window_index,'%s_attackTime_attackStartIdx'%window_index,'%s_peakSample_index'%window_index))
            else:
                todrop.extend(['%s_attackTime_peaksamp' % window_index,'%s_attackTime_attackStartIdx' % window_index,'%s_peakSample_index' % window_index])
    else:
        if 'attackTime_peaksamp' not in res_df.columns.to_list() or 'attackTime_attackStartIdx' not in res_df.columns.to_list() or 'peakSample_index' not in res_df.columns.to_list():
            print("Warning! The features dataframe does not contain the required columns! (%s, %s, %s)" % ('attackTime_peaksamp' in res_df.columns.to_list(), 'attackTime_attackStartIdx' in res_df.columns.to_list(), 'peakSample_index' in res_df.columns.to_list()))
        else:
            todrop.extend(['attackTime_peaksamp','attackTime_attackStartIdx','peakSample_index'])

    res_df.drop(columns=todrop, inplace=True)
    return res_df


#measure time
startime = time()
featuredataset = drop_unused_features(featuredataset, is_windowed = DATA_IS_WINDOWED)
print('It took %.1fs to drop unused features' % (time()-startime))

if EXTRA_TEST_DATA_PATH:
    extratest_featuredataset = drop_unused_features(extratest_featuredataset, is_windowed = DATA_IS_WINDOWED)
if USE_AUGMENTED_DATA:
    augmented_featuredataset = drop_unused_features(augmented_featuredataset, is_windowed = DATA_IS_WINDOWED)

### If specified, drop extra percussive recorded data

In [None]:
# if FEATURE_WINDOW_SIZE == FeatureWindowSize.s704_Samples_14ms:
#     assert featuredataset.shape == (EXPECTED_DATASED_SIZE, 504)
if DROP_EXTRA_PERCUSSIVE_SOUNDS:
    to_drop_count = np.count_nonzero(featuredataset.meta_audiofilePath.str.contains("additional-500").values)
    if to_drop_count >= 0:
        print('Dropping %d additional percussive recordings because "DROP_EXTRA_PERCUSSIVE_SOUNDS" was specified.'%(to_drop_count))
        featuredataset = featuredataset[~featuredataset.meta_audiofilePath.str.contains("additional-500")].reset_index(drop=True)
        print('Dataset shape after dropping extra percussive recordings: %s'%(str(featuredataset.shape)))
    # if FEATURE_WINDOW_SIZE == FeatureWindowSize.s704_Samples_14ms:
    #     assert featuredataset.shape == (EXPECTED_DATASED_SIZE-2237, 504)


if USE_AUGMENTED_DATA and DROP_EXTRA_PERCUSSIVE_SOUNDS_FROMAUG:
    augmented_featuredataset_dr = augmented_featuredataset.copy()
    to_drop_count_aug = np.count_nonzero(augmented_featuredataset.meta_augmentation_source.str.contains("additional-500").values)
    if to_drop_count_aug >= 0:
        print('Dropping %d additional percussive recordings because "DROP_EXTRA_PERCUSSIVE_SOUNDS" was specified.'%(to_drop_count_aug))
        augmented_featuredataset_dr = augmented_featuredataset[~augmented_featuredataset.meta_augmentation_source.str.contains("additional-500")].reset_index(drop=True)
        print('Dataset shape after dropping extra percussive recordings: %s'%(str(augmented_featuredataset_dr.shape)))

In [None]:
# Divide dataset into metadata, features and labels
def divide_dataset(features_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    # print time of eacch operation / line
    # startime = time()
    metadata = features_df.filter(regex='^meta_',axis=1)
    # print('It took %.1fs to extract metadata' % (time()-startime))
    # startime = time()
    labels = features_df.meta_expressive_technique_id
    # print('It took %.1fs to extract labels' % (time()-startime))
    # startime = time()
    features = features_df.loc[:,[col for col in features_df.columns if col not in metadata.columns]]
    # print('It took %.1fs to extract features' % (time()-startime))
    # Convert to numeric formats where possible (somehow convert_dtypes doesn't work [https://stackoverflow.com/questions/65915048/pandas-convert-dtypes-not-working-on-numbers-marked-as-objects])
    # startime = time()
    metadata = metadata.apply(pd.to_numeric, errors='ignore')
    labels = labels.apply(pd.to_numeric, errors='ignore')
    features = features.apply(pd.to_numeric, errors='ignore')
    # print('It took %.1fs to convert to numeric types' % (time()-startime))
    return metadata, features, labels

metadata, features, labels = divide_dataset(featuredataset)
DATASET_ALREADY_FILTERED_FOR_LIMITED_STUDY = False
assert metadata.shape[1] == 9
if FEATURE_WINDOW_SIZE == FeatureWindowSize.s704_Samples_14ms:
    assert features.shape[1] == 495
elif FEATURE_WINDOW_SIZE == FeatureWindowSize.s4800_SAMPLES_100ms:
    assert features.shape[1] == 2543

if USE_AUGMENTED_DATA:
    metadata_aug, features_aug, labels_aug = divide_dataset(augmented_featuredataset_dr if DROP_EXTRA_PERCUSSIVE_SOUNDS_FROMAUG else augmented_featuredataset)

In [None]:
# metadata, features, labels 
def filter_by_filenames(metadata, features, labels, filenames):
    _metadata = metadata[metadata.meta_audiofilePath.isin(filenames)]
    _features = features.iloc[_metadata.index]
    _labels = labels[_metadata.index]

    assert len(metadata.values) == len(features.values) == len(labels.values), 'The number of samples in the metadata, features and labels do not match!'
    assert len(np.unique(_metadata.meta_audiofilePath.values) == len(filenames)), 'The number of unique audiofiles in the filtered metadata does not match the number of unique audiofiles in filenames'
    assert sorted(np.unique(_metadata.meta_audiofilePath.values)) == sorted(filenames), 'The unique audiofiles in the filtered metadata do not match the unique audiofiles in filenames'
    return _metadata.reset_index(drop=True), _features.reset_index(drop=True), _labels.reset_index(drop=True)

if classification_task == ClassificationTask.LIMITED_DATASET_NUMBER_FULL_8 and DATASET_ALREADY_FILTERED_FOR_LIMITED_STUDY:
    print('Dataset already filtered for limited study (Guitar/Player pairs: %s), skipping filtering step.'%(WHICH_GUITAR_PLAYER_COMBOS_TO_USE))
elif classification_task == ClassificationTask.LIMITED_DATASET_NUMBER_FULL_8:
    if USE_AUGMENTED_DATA:
        raise NotImplementedError('I did not implement the code for augmented data filtering with limited dataset number yet!')
    # First we get the guitar/player pairs that are in the dataset
    unique_filenames = metadata.meta_audiofilePath.unique() # This is from the current feature csv
    # Now we get from the full dataset, the data about the current feature files
    files_df = egtdb['files_df']
    phase3_files_df = files_df[files_df.index.isin(unique_filenames)]
    # Guitar/Player pairs in filtered dataset
    guitar_player_pairs = [(i[0],i[1]) for i in np.unique(list(zip(phase3_files_df['guitar_id'].values,phase3_files_df['player_id'].values)),axis=0)]
    gppairs_char_indexes = [chr(97+ctr) for ctr in range(len(guitar_player_pairs))]
    print('Guitar/Player pairs in the dataset:\n%s'%'\n'.join(['-[%s] guit%s-play%s'%(gppairs_char_indexes[ctr],i,j) for (ctr,(i,j)) in enumerate(guitar_player_pairs)]))

    additional_info_to_save = ''
    if WHICH_GUITAR_PLAYER_COMBOS_TO_USE != None:
        assert len(set(WHICH_GUITAR_PLAYER_COMBOS_TO_USE)) == len(WHICH_GUITAR_PLAYER_COMBOS_TO_USE), 'Please specify guitar/player pairs to use without duplicates (e.g. ["a","b","c"]'
        assert len(WHICH_GUITAR_PLAYER_COMBOS_TO_USE) <= len(guitar_player_pairs), 'You specified more guitar/player pairs to use than there are in the dataset!'
        assert all([isinstance(e,str) for e in WHICH_GUITAR_PLAYER_COMBOS_TO_USE]), 'Please specify guitar/player pairs to use as a list of strings'
        assert all([e in gppairs_char_indexes for e in WHICH_GUITAR_PLAYER_COMBOS_TO_USE]), '%s is/are not valid guitar/player pairs, please use one of the following: %s'%(str([e for e in WHICH_GUITAR_PLAYER_COMBOS_TO_USE if e not in gppairs_char_indexes]),str(gppairs_char_indexes))
        additional_info_to_save = 'This is a study with LIMITED number of guitar/player pairs in the dataset. The guitar/player pairs used are: %s'%str(WHICH_GUITAR_PLAYER_COMBOS_TO_USE)
        additional_info_to_save += '\nThese correspond to:\n'+'\n'.join(['-[%s] guitar %s / player %s'%(gppairs_char_indexes[ctr],i,j) for (ctr,(i,j)) in enumerate(guitar_player_pairs) if gppairs_char_indexes[ctr] in WHICH_GUITAR_PLAYER_COMBOS_TO_USE])
        additional_info_to_save += '\nCheck correspondance at "%s" and "%s"'%(os.path.join(metaset_root_path,'metadata','instruments.csv'),os.path.join(metaset_root_path,'metadata','people.csv'))

        gp_pairs_selected = [guitar_player_pairs[i] for i,e in enumerate(gppairs_char_indexes) if e in WHICH_GUITAR_PLAYER_COMBOS_TO_USE]

        # Now we filter the dataset
        limited_filtered_filenames = []
        for (guitar_id,player_id) in gp_pairs_selected:
            limited_filtered_filenames += list(phase3_files_df[(phase3_files_df['guitar_id'] == guitar_id) & (phase3_files_df['player_id'] == player_id)].index)
        print(additional_info_to_save)

        g_filenames_players = np.unique([re.findall(r'_[A-Z]\w\w[A-Z]\w\w\d?_',e) for e in limited_filtered_filenames])
        assert len(g_filenames_players) == len(gp_pairs_selected), 'The number of unique players in the filtered filenames (%d) does not match the number of unique players in the guitar/player pairs that should have been selected (%s) = %s != %s'% (len(g_filenames_players), len(gp_pairs_selected),str(g_filenames_players), str(gp_pairs_selected))
        # print(limited_filtered_filenames)
    else:
        raise NotImplementedError('Random Player selection not implemented yet, please specfy a lust of guitar/player pairs to use in WHICH_GUITAR_PLAYER_COMBOS_TO_USE. (chars from a to %s)'%(max(gppairs_char_indexes)))
    
    metadata, features, labels = filter_by_filenames(metadata, features, labels, limited_filtered_filenames)
    DATASET_ALREADY_FILTERED_FOR_LIMITED_STUDY = True

In [None]:
def get_classes_description(classftask: ClassificationTask):
    if classification_task == ClassificationTask.FULL_8_CLASS_PROBLEM or classification_task == ClassificationTask.LIMITED_DATASET_NUMBER_FULL_8:
        classes_desk = {0:"Kick",1:"Snare 1",2:"Tom",3:"Snare 2",4:"Natural Harmonics",5:"Palm Mute",6:"Pick Near Bridge",7:"Pick Over the Soundhole"}
        shortnames = {0:"Kick",1:"Snare1",2:"Tom",3:"Snare2",4:"NatHarm",5:"PalmMute",6:"BridgeP",7:"SoundholeP"}
    else:
        raise Exception('The Classification Task selected is not supported')
    classes = list(classes_desk.keys())
    return classes,classes_desk,shortnames

def filter_dataset(tofilt_features,tofilt_labels,tofilt_metadata,classftask: ClassificationTask, hardcoded_sizes_test = False):
    if classification_task == ClassificationTask.FULL_8_CLASS_PROBLEM or classification_task == ClassificationTask.LIMITED_DATASET_NUMBER_FULL_8:
        pass
    else:
        raise Exception('The Classification Task selected is not supported')


    tofilt_features.reset_index(drop=True,inplace=True)
    tofilt_labels.reset_index(drop=True,inplace=True)
    tofilt_metadata.reset_index(drop=True,inplace=True)

    return tofilt_features, tofilt_labels, tofilt_metadata

original_dataset_features = features.copy()
dataset_labels = labels.copy()
dataset_metadata = metadata.copy()

assert classification_task == ClassificationTask.LIMITED_DATASET_NUMBER_FULL_8
CLASSES,CLASSES_DESC,SHORTDESC = get_classes_description(classification_task)
original_dataset_features,dataset_labels,dataset_metadata = filter_dataset(original_dataset_features,dataset_labels,dataset_metadata,classification_task,hardcoded_sizes_test=True if FEATURE_WINDOW_SIZE == FeatureWindowSize.s704_Samples_14ms else False)
if USE_AUGMENTED_DATA:
    features_aug,labels_aug,metadata_aug = filter_dataset(features_aug,labels_aug,metadata_aug,classification_task,hardcoded_sizes_test=False)
    assert len(np.sort(CLASSES)) == len(np.sort(pd.unique(labels_aug))) and np.equal(np.sort(CLASSES),np.sort(pd.unique(labels_aug))).all()

assert len(np.sort(CLASSES)) == len(np.sort(pd.unique(dataset_labels))) and np.equal(np.sort(CLASSES),np.sort(pd.unique(dataset_labels))).all(), "The classes in the dataset are not the same as the ones in the classification task"

In [None]:
toprint = [(original_dataset_features,'Main dataset')]
if USE_AUGMENTED_DATA:
    toprint.append((features_aug,'Augmented data'))

for dat,name in toprint:
    print('Dataset "'+name+'" read')
    print("├╴Entries: "+str(dat.shape[0]))
    if DATA_IS_WINDOWED:
        print('├╴Features per window: '+str(dat.shape[1]//len(WINDOW_INDEXES)))
        print("└╴Windows: "+str(len(WINDOW_INDEXES)))
    else:
        print('├╴Features: '+str(dat.shape[1]))

original_feature_number = original_dataset_features.shape[1]

## Parse Command Line arguments

*_Important_*: If you are running this from a jupyter Notebook, change the run parameters at the end of the next cell

In [None]:
args = None
if not is_notebook() and not COLAB:
    import argparse
    parser = argparse.ArgumentParser(description='Train the expressive guitar technique classifier.')

    def featnum_type(x):
        (MIN,MAX) = (1,495)
        if x == 'all':
            return x
        x = int(x)
        assert int(x) == float(x), "Parameter must be an integer"
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("Feature parameter must either 'all' or a number be between {} and {}".format(MIN, MAX))
        return x
    def netdepth_type(x):
        (MIN,MAX) = (0,20) 
        x = int(x)
        assert int(x) == float(x), "Parameter must be an integer"
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("Network depth must be between {} and {}".format(MIN, MAX))
        return x
    def netwidth_type(x):
        (MIN,MAX) = (1,2000) 
        x = int(x)
        assert int(x) == float(x), "Parameter must be an integer"
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("Network width must be between {} and {}".format(MIN, MAX))
        return x
    def dropout_type(x):
        (MIN,MAX) = (0,1) 
        x = float(x)
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("Dropout Rate must be between {} and {}".format(MIN, MAX))
        return x
    def aggressiveness_type(x):
        (MIN,MAX) = (0,1) 
        x = float(x)
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("Oversampling aggressiveness value must be between {} and {}".format(MIN, MAX))
        return x
    def lr_type(x):
        (MIN,MAX) = (0,1) 
        x = float(x)
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("Learning rate must be between {} and {}".format(MIN, MAX))
        return x
    def batchsize_type(x):
        (MIN,MAX) = (1,4096) 
        x = int(x)
        assert int(x) == float(x), "Parameter must be an integer"
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("Batchsize must be between {} and {}".format(MIN, MAX))
        return x
    def epochs_type(x):
        (MIN,MAX) = (1,10000) 
        x = int(x)
        assert int(x) == float(x), "Parameter must be an integer"
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("Epoch number must be between {} and {}".format(MIN, MAX))
        return x
    def kfold_type(x):
        (MIN,MAX) = (1,20) 
        x = int(x)
        assert int(x) == float(x), "Parameter must be an integer"
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("KFOLD size must be between {} and {}".format(MIN, MAX))
        return x
    def c1d_type(x):
        (MIN,MAX) = (0,5) 
        x = int(x)
        assert int(x) == float(x), "Parameter must be an integer"
        if x < MIN or x > MAX:
            raise argparse.ArgumentTypeError("Number of conv layers must be between {} and {}".format(MIN, MAX))
        return x

    parser.add_argument('-f',  '--features',      default='all',     type=featnum_type,   help='Number of features to use for training [1-495] (default: 80)')
    parser.add_argument('-d',  '--net-depth',     default=3,      type=netdepth_type,  help='Number of dense layers in the network [0-20] (default: 3)')
    parser.add_argument('-w',  '--net-width',     default=100,    type=netwidth_type,  help='Number of layers in the FFNN [1-2000] (default: 100)')
    parser.add_argument('-dr', '--dropout',       default=0.15,   type=dropout_type,   help='Dropout amount [0-1] (default: 0.15)')
    parser.add_argument('-lr', '--learning-rate', default=0.0001, type=lr_type,        help='Learning rate [0-1] (default: 0.0001)')
    parser.add_argument('-bs', '--batchsize',     default=256,    type=batchsize_type, help='Learning rate [1-4096] (default: 256)')
    parser.add_argument('-e',  '--epochs',        default=1000,   type=epochs_type,    help='Learning rate [1-10000] (default: 1000)')
    parser.add_argument('-k',  '--k-folds',       default=5,      type=kfold_type,     help='K of K-folds [1-20] (default: 5)')
    parser.add_argument('-osagg', '--oversampling-aggressiveness',  default=0.2,   type=aggressiveness_type,   help='Oversampling aggressiveness [0-1] (default: 0.2)')
    parser.add_argument('-c1d',   '--conv',     default=0,      type=c1d_type,     help='Number of conv1D layers at the beginning [1-5] (default: 0)')
    parser.add_argument('-ck',    '--conv-kernels',     default='',      type=ascii,     help='Comma-separated list of kernel sizes for conv1D layers (es: 3,5,7)')
    parser.add_argument('-cs',    '--conv-strides',     default='',      type=ascii,     help='Comma-separated list of strides for conv1D layers (es: 1,1,1)')
    parser.add_argument('-cf',    '--conv-filters',     default='',      type=ascii,     help='Comma-separated list of filters for conv1D layers (es: 32,64,128)')
    parser.add_argument('-cact','--conv-activations', default='',  type=ascii,     help='Comma-separated list of activations for conv1D layers (es: relu,relu,relu)')
    parser.add_argument('-cp','--conv-padding', default='',  type=ascii,     help='Comma-separated list of padding method layers. Use "same" or "valid" (es: same,same,same)')
    parser.add_argument('-pl','--pool-layers', default='',  type=ascii,     help='Comma-separated list of pool layers. Use "N" for none, "M" for max-pooling and "A" for average pooling  (es: M,N,M)')
    parser.add_argument('-v', '--verbose',        action='store_true', help='increase output verbosity')
    args = parser.parse_args()
    args = vars(args)
else:

 
    '''
        These are the best parameters found by the hyperparameter optimization for problem 704
        You are not supposed to change them here
    '''
    args = {'features':      'all',          'net_depth':     2,          'net_width':     32,          'dropout':       0.5,         'learning_rate': 0.00008,         'batchsize':     64,         'epochs':        500,         'k_folds':       5,         'oversampling':  True,         'oversampling_aggressiveness':  1.0,         'conv':        2,         'conv_kernels': '3,3',         'conv_strides': '1,1',         'conv_filters': '4,4',         'conv_activations': 'relu,relu',         'conv_padding': 'same,same',         'pool_layers': 'M,M',         'verbose':       False}


args['conv_kernels'] = args['conv_kernels'].strip("'")
args['conv_filters'] = args['conv_filters'].strip("'")
args['conv_activations'] = args['conv_activations'].strip("'")
args['conv_strides'] = args['conv_strides'].strip("'")
args['conv_padding'] = args['conv_padding'].strip("'")
args['pool_layers'] = args['pool_layers'].strip("'")

KERNEL_SIZES = [int(x) for x in args['conv_kernels'].split(',')] if args['conv'] > 1 else [int(args['conv_kernels'])] if args['conv'] == 1 else []
print('KERNEL_SIZES: ', KERNEL_SIZES)
FILTERS = [int(x) for x in args['conv_filters'].split(',')] if args['conv'] > 1 else [int(args['conv_filters'])] if args['conv'] == 1 else []
print('FILTERS: ', FILTERS)
CONV_ACTIVATIONS = args['conv_activations'].split(',')    if args['conv'] > 1 else [args['conv_activations']] if args['conv'] == 1 else []
CONV_ACTIVATIONS = [e if e.lower() != 'none' else None for e in CONV_ACTIVATIONS]
print('CONV_ACTIVATIONS: ', CONV_ACTIVATIONS)
STRIDES = [int(x) for x in args['conv_strides'].split(',')] if args['conv'] > 1 else [int(args['conv_strides'])] if args['conv'] == 1 else []
print('STRIDES: ', STRIDES)
PADDING = args['conv_padding'].split(',') if args['conv'] > 1 else [args['conv_padding']] if args['conv'] == 1 else []
print('PADDING: ', PADDING)
POOL_LAYERS = args['pool_layers'].split(',') if args['conv'] > 1 else [args['pool_layers']] if args['conv'] == 1 else []
#Pooling layers must be one of 'M', 'N', or 'A' 
assert all([e in ['M','N','A'] for e in POOL_LAYERS]), "Pooling layers must be one of 'M', 'N', or 'A'"
print('POOL_LAYERS: ', POOL_LAYERS)

POOL_SIZES = [2]*args['conv']

assert len(KERNEL_SIZES)     == args['conv'], "The number of kernel sizes must be equal to the number of conv layers ({} != {})".format(len(KERNEL_SIZES), args['conv'])
assert len(FILTERS)          == args['conv'], "The number of filters must be equal to the number of conv layers ({} != {})".format(len(FILTERS), args['conv'])
assert len(CONV_ACTIVATIONS) == args['conv'], "The number of activations must be equal to the number of conv layers ({} != {})".format(len(CONV_ACTIVATIONS), args['conv'])
assert len(STRIDES)          == args['conv'], "The number of strides must be equal to the number of conv layers ({} != {})".format(len(STRIDES), args['conv'])
assert len(PADDING)          == args['conv'], "The number of padding arguments must be equal to the number of conv layers ({} != {})".format(len(PADDING), args['conv'])
assert len(POOL_LAYERS)      == args['conv'], "The number of pool layers must be equal to the number of conv layers ({} != {})".format(len(POOL_LAYERS), args['conv'])
# raise Exception('STOP')

gc.collect() # call garbage collector to free up memory
tf.keras.backend.clear_session()

In [None]:
selected_features = ['0_attackTime_value', '0_barkSpecBrightness', '0_barkSpec_1', '0_barkSpec_2', '0_barkSpec_3', '0_barkSpec_4', '0_barkSpec_5', '0_barkSpec_6', '0_barkSpec_7', '0_barkSpec_8', '0_barkSpec_9', '0_barkSpec_10', '0_barkSpec_11', '0_barkSpec_12', '0_barkSpec_13', '0_barkSpec_14', '0_barkSpec_15', '0_barkSpec_16', '0_barkSpec_17', '0_barkSpec_18', '0_barkSpec_19', '0_barkSpec_20', '0_barkSpec_21', '0_barkSpec_22', '0_barkSpec_23', '0_barkSpec_24', '0_barkSpec_25', '0_barkSpec_26', '0_barkSpec_27', '0_barkSpec_28', '0_barkSpec_29', '0_barkSpec_30', '0_barkSpec_31', '0_barkSpec_32', '0_barkSpec_33', '0_barkSpec_34', '0_barkSpec_35', '0_barkSpec_36', '0_barkSpec_37', '0_barkSpec_38', '0_barkSpec_39', '0_barkSpec_40', '0_barkSpec_41', '0_barkSpec_42', '0_barkSpec_43', '0_barkSpec_44', '0_barkSpec_45', '0_barkSpec_46', '0_barkSpec_47', '0_barkSpec_48', '0_barkSpec_49', '0_barkSpec_50', '0_bfcc_1', '0_bfcc_2', '0_bfcc_3', '0_bfcc_4', '0_bfcc_5', '0_bfcc_6', '0_bfcc_7', '0_bfcc_8', '0_bfcc_9', '0_bfcc_10', '0_bfcc_11', '0_bfcc_12', '0_bfcc_13', '0_bfcc_14', '0_bfcc_15', '0_bfcc_16', '0_bfcc_17', '0_bfcc_18', '0_bfcc_19', '0_bfcc_20', '0_bfcc_21', '0_bfcc_22', '0_bfcc_23', '0_bfcc_24', '0_bfcc_25', '0_bfcc_26', '0_bfcc_27', '0_bfcc_28', '0_bfcc_29', '0_bfcc_30', '0_bfcc_31', '0_bfcc_32', '0_bfcc_33', '0_bfcc_34', '0_bfcc_35', '0_bfcc_36', '0_bfcc_37', '0_bfcc_38', '0_bfcc_39', '0_bfcc_40', '0_bfcc_41', '0_bfcc_42', '0_bfcc_43', '0_bfcc_44', '0_bfcc_45', '0_bfcc_46', '0_bfcc_47', '0_bfcc_48', '0_bfcc_49', '0_bfcc_50', '0_cepstrum_1', '0_cepstrum_2', '0_cepstrum_3', '0_cepstrum_4', '0_cepstrum_5', '0_cepstrum_6', '0_cepstrum_7', '0_cepstrum_8', '0_cepstrum_9', '0_cepstrum_10', '0_cepstrum_11', '0_cepstrum_12', '0_cepstrum_13', '0_cepstrum_14', '0_cepstrum_15', '0_cepstrum_16', '0_cepstrum_17', '0_cepstrum_18', '0_cepstrum_19', '0_cepstrum_20', '0_cepstrum_21', '0_cepstrum_22', '0_cepstrum_23', '0_cepstrum_24', '0_cepstrum_25', '0_cepstrum_26', '0_cepstrum_27', '0_cepstrum_28', '0_cepstrum_29', '0_cepstrum_30', '0_cepstrum_31', '0_cepstrum_32', '0_cepstrum_33', '0_cepstrum_34', '0_cepstrum_35', '0_cepstrum_36', '0_cepstrum_37', '0_cepstrum_38', '0_cepstrum_39', '0_cepstrum_40', '0_cepstrum_41', '0_cepstrum_42', '0_cepstrum_43', '0_cepstrum_44', '0_cepstrum_45', '0_cepstrum_46', '0_cepstrum_47', '0_cepstrum_48', '0_cepstrum_49', '0_cepstrum_50', '0_cepstrum_51', '0_cepstrum_52', '0_cepstrum_53', '0_cepstrum_54', '0_cepstrum_55', '0_cepstrum_56', '0_cepstrum_57', '0_cepstrum_58', '0_cepstrum_59', '0_cepstrum_60', '0_cepstrum_61', '0_cepstrum_62', '0_cepstrum_63', '0_cepstrum_64', '0_cepstrum_65', '0_cepstrum_66', '0_cepstrum_67', '0_cepstrum_68', '0_cepstrum_69', '0_cepstrum_70', '0_cepstrum_71', '0_cepstrum_72', '0_cepstrum_73', '0_cepstrum_74', '0_cepstrum_75', '0_cepstrum_76', '0_cepstrum_77', '0_cepstrum_78', '0_cepstrum_79', '0_cepstrum_80', '0_cepstrum_81', '0_cepstrum_82', '0_cepstrum_83', '0_cepstrum_84', '0_cepstrum_85', '0_cepstrum_86', '0_cepstrum_87', '0_cepstrum_88', '0_cepstrum_89', '0_cepstrum_90', '0_cepstrum_91', '0_cepstrum_92', '0_cepstrum_93', '0_cepstrum_94', '0_cepstrum_95', '0_cepstrum_96', '0_cepstrum_97', '0_cepstrum_98', '0_cepstrum_99', '0_cepstrum_100', '0_cepstrum_101', '0_cepstrum_102', '0_cepstrum_103', '0_cepstrum_104', '0_cepstrum_105', '0_cepstrum_106', '0_cepstrum_107', '0_cepstrum_108', '0_cepstrum_109', '0_cepstrum_110', '0_cepstrum_111', '0_cepstrum_112', '0_cepstrum_113', '0_cepstrum_114', '0_cepstrum_115', '0_cepstrum_116', '0_cepstrum_117', '0_cepstrum_118', '0_cepstrum_119', '0_cepstrum_120', '0_cepstrum_121', '0_cepstrum_122', '0_cepstrum_123', '0_cepstrum_124', '0_cepstrum_125', '0_cepstrum_126', '0_cepstrum_127', '0_cepstrum_128', '0_cepstrum_129', '0_mfcc_1', '0_mfcc_2', '0_mfcc_3', '0_mfcc_4', '0_mfcc_5', '0_mfcc_6', '0_mfcc_7', '0_mfcc_8', '0_mfcc_9', '0_mfcc_10', '0_mfcc_11', '0_mfcc_12', '0_mfcc_13', '0_mfcc_14', '0_mfcc_15', '0_mfcc_16', '0_mfcc_17', '0_mfcc_18', '0_mfcc_19', '0_mfcc_20', '0_mfcc_21', '0_mfcc_22', '0_mfcc_23', '0_mfcc_24', '0_mfcc_25', '0_mfcc_26', '0_mfcc_27', '0_mfcc_28', '0_mfcc_29', '0_mfcc_30', '0_mfcc_31', '0_mfcc_32', '0_mfcc_33', '0_mfcc_34', '0_mfcc_35', '0_mfcc_36', '0_mfcc_37', '0_mfcc_38', '0_peakSample_value', '0_zeroCrossing', '1_attackTime_value', '1_barkSpecBrightness', '1_barkSpec_1', '1_barkSpec_2', '1_barkSpec_3', '1_barkSpec_4', '1_barkSpec_5', '1_barkSpec_6', '1_barkSpec_7', '1_barkSpec_8', '1_barkSpec_9', '1_barkSpec_10', '1_barkSpec_11', '1_barkSpec_12', '1_barkSpec_13', '1_barkSpec_14', '1_barkSpec_15', '1_barkSpec_16', '1_barkSpec_17', '1_barkSpec_18', '1_barkSpec_19', '1_barkSpec_20', '1_barkSpec_21', '1_barkSpec_22', '1_barkSpec_23', '1_barkSpec_24', '1_barkSpec_25', '1_barkSpec_26', '1_barkSpec_27', '1_barkSpec_28', '1_barkSpec_29', '1_barkSpec_30', '1_barkSpec_31', '1_barkSpec_32', '1_barkSpec_33', '1_barkSpec_34', '1_barkSpec_35', '1_barkSpec_36', '1_barkSpec_37', '1_barkSpec_38', '1_barkSpec_39', '1_barkSpec_40', '1_barkSpec_41', '1_barkSpec_42', '1_barkSpec_43', '1_barkSpec_44', '1_barkSpec_45', '1_barkSpec_46', '1_barkSpec_47', '1_barkSpec_48', '1_barkSpec_49', '1_barkSpec_50', '1_bfcc_1', '1_bfcc_2', '1_bfcc_3', '1_bfcc_4', '1_bfcc_5', '1_bfcc_6', '1_bfcc_7', '1_bfcc_8', '1_bfcc_9', '1_bfcc_10', '1_bfcc_11', '1_bfcc_12', '1_bfcc_13', '1_bfcc_14', '1_bfcc_15', '1_bfcc_16', '1_bfcc_17', '1_bfcc_18', '1_bfcc_19', '1_bfcc_20', '1_bfcc_21', '1_bfcc_22', '1_bfcc_23', '1_bfcc_24', '1_bfcc_25', '1_bfcc_26', '1_bfcc_27', '1_bfcc_28', '1_bfcc_29', '1_bfcc_30', '1_bfcc_31', '1_bfcc_32', '1_bfcc_33', '1_bfcc_34', '1_bfcc_35', '1_bfcc_36', '1_bfcc_37', '1_bfcc_38', '1_bfcc_39', '1_bfcc_40', '1_bfcc_41', '1_bfcc_42', '1_bfcc_43', '1_bfcc_44', '1_bfcc_45', '1_bfcc_46', '1_bfcc_47', '1_bfcc_48', '1_bfcc_49', '1_bfcc_50', '1_cepstrum_1', '1_cepstrum_2', '1_cepstrum_3', '1_cepstrum_4', '1_cepstrum_5', '1_cepstrum_6', '1_cepstrum_7', '1_cepstrum_8', '1_cepstrum_9', '1_cepstrum_10', '1_cepstrum_11', '1_cepstrum_12', '1_cepstrum_13', '1_cepstrum_14', '1_cepstrum_15', '1_cepstrum_16', '1_cepstrum_17', '1_cepstrum_18', '1_cepstrum_19', '1_cepstrum_20', '1_cepstrum_21', '1_cepstrum_22', '1_cepstrum_23', '1_cepstrum_24', '1_cepstrum_25', '1_cepstrum_26', '1_cepstrum_27', '1_cepstrum_28', '1_cepstrum_29', '1_cepstrum_30', '1_cepstrum_31', '1_cepstrum_32', '1_cepstrum_33', '1_cepstrum_34', '1_cepstrum_35', '1_cepstrum_36', '1_cepstrum_37', '1_cepstrum_38', '1_cepstrum_39', '1_cepstrum_40', '1_cepstrum_41', '1_cepstrum_42', '1_cepstrum_43', '1_cepstrum_44', '1_cepstrum_45', '1_cepstrum_46', '1_cepstrum_47', '1_cepstrum_48', '1_cepstrum_49', '1_cepstrum_50', '1_cepstrum_51', '1_cepstrum_52', '1_cepstrum_53', '1_cepstrum_54', '1_cepstrum_55', '1_cepstrum_56', '1_cepstrum_57', '1_cepstrum_58', '1_cepstrum_59', '1_cepstrum_60', '1_cepstrum_61', '1_cepstrum_62', '1_cepstrum_63', '1_cepstrum_64', '1_cepstrum_65', '1_cepstrum_66', '1_cepstrum_67', '1_cepstrum_68', '1_cepstrum_69', '1_cepstrum_70', '1_cepstrum_71', '1_cepstrum_72', '1_cepstrum_73', '1_cepstrum_74', '1_cepstrum_75', '1_cepstrum_76', '1_cepstrum_77', '1_cepstrum_78', '1_cepstrum_79', '1_cepstrum_80', '1_cepstrum_81', '1_cepstrum_82', '1_cepstrum_83', '1_cepstrum_84', '1_cepstrum_85', '1_cepstrum_86', '1_cepstrum_87', '1_cepstrum_88', '1_cepstrum_89', '1_cepstrum_90', '1_cepstrum_91', '1_cepstrum_92', '1_cepstrum_93', '1_cepstrum_94', '1_cepstrum_95', '1_cepstrum_96', '1_cepstrum_97', '1_cepstrum_98', '1_cepstrum_99', '1_cepstrum_100', '1_cepstrum_101', '1_cepstrum_102', '1_cepstrum_103', '1_cepstrum_104', '1_cepstrum_105', '1_cepstrum_106', '1_cepstrum_107', '1_cepstrum_108', '1_cepstrum_109', '1_cepstrum_110', '1_cepstrum_111', '1_cepstrum_112', '1_cepstrum_113', '1_cepstrum_114', '1_cepstrum_115', '1_cepstrum_116', '1_cepstrum_117', '1_cepstrum_118', '1_cepstrum_119', '1_cepstrum_120', '1_cepstrum_121', '1_cepstrum_122', '1_cepstrum_123', '1_cepstrum_124', '1_cepstrum_125', '1_cepstrum_126', '1_cepstrum_127', '1_cepstrum_128', '1_cepstrum_129', '1_mfcc_1', '1_mfcc_2', '1_mfcc_3', '1_mfcc_4', '1_mfcc_5', '1_mfcc_6', '1_mfcc_7', '1_mfcc_8', '1_mfcc_9', '1_mfcc_10', '1_mfcc_11', '1_mfcc_12', '1_mfcc_13', '1_mfcc_14', '1_mfcc_15', '1_mfcc_16', '1_mfcc_17', '1_mfcc_18', '1_mfcc_19', '1_mfcc_20', '1_mfcc_21', '1_mfcc_22', '1_mfcc_23', '1_mfcc_24', '1_mfcc_25', '1_mfcc_26', '1_mfcc_27', '1_mfcc_28', '1_mfcc_29', '1_mfcc_30', '1_mfcc_31', '1_mfcc_32', '1_mfcc_33', '1_mfcc_34', '1_mfcc_35', '1_mfcc_36', '1_mfcc_37', '1_mfcc_38', '1_peakSample_value', '1_zeroCrossing', '2_attackTime_value', '2_barkSpecBrightness', '2_barkSpec_1', '2_barkSpec_2', '2_barkSpec_3', '2_barkSpec_4', '2_barkSpec_5', '2_barkSpec_6', '2_barkSpec_7', '2_barkSpec_8', '2_barkSpec_9', '2_barkSpec_10', '2_barkSpec_11', '2_barkSpec_12', '2_barkSpec_13', '2_barkSpec_14', '2_barkSpec_15', '2_barkSpec_16', '2_barkSpec_17', '2_barkSpec_18', '2_barkSpec_19', '2_barkSpec_20', '2_barkSpec_21', '2_barkSpec_22', '2_barkSpec_23', '2_barkSpec_24', '2_barkSpec_25', '2_barkSpec_26', '2_barkSpec_27', '2_barkSpec_28', '2_barkSpec_29', '2_barkSpec_30', '2_barkSpec_31', '2_barkSpec_32', '2_barkSpec_33', '2_barkSpec_34', '2_barkSpec_35', '2_barkSpec_36', '2_barkSpec_37', '2_barkSpec_38', '2_barkSpec_39', '2_barkSpec_40', '2_barkSpec_41', '2_barkSpec_42', '2_barkSpec_43', '2_barkSpec_44', '2_barkSpec_45', '2_barkSpec_46', '2_barkSpec_47', '2_barkSpec_48', '2_barkSpec_49', '2_barkSpec_50', '2_bfcc_1', '2_bfcc_2', '2_bfcc_3', '2_bfcc_4', '2_bfcc_5', '2_bfcc_6', '2_bfcc_7', '2_bfcc_8', '2_bfcc_9', '2_bfcc_10', '2_bfcc_11', '2_bfcc_12', '2_bfcc_13', '2_bfcc_14', '2_bfcc_15', '2_bfcc_16', '2_bfcc_17', '2_bfcc_18', '2_bfcc_19', '2_bfcc_20', '2_bfcc_21', '2_bfcc_22', '2_bfcc_23', '2_bfcc_24', '2_bfcc_25', '2_bfcc_26', '2_bfcc_27', '2_bfcc_28', '2_bfcc_29', '2_bfcc_30', '2_bfcc_31', '2_bfcc_32', '2_bfcc_33', '2_bfcc_34', '2_bfcc_35', '2_bfcc_36', '2_bfcc_37', '2_bfcc_38', '2_bfcc_39', '2_bfcc_40', '2_bfcc_41', '2_bfcc_42', '2_bfcc_43', '2_bfcc_44', '2_bfcc_45', '2_bfcc_46', '2_bfcc_47', '2_bfcc_48', '2_bfcc_49', '2_bfcc_50', '2_cepstrum_1', '2_cepstrum_2', '2_cepstrum_3', '2_cepstrum_4', '2_cepstrum_5', '2_cepstrum_6', '2_cepstrum_7', '2_cepstrum_8', '2_cepstrum_9', '2_cepstrum_10', '2_cepstrum_11', '2_cepstrum_12', '2_cepstrum_13', '2_cepstrum_14', '2_cepstrum_15', '2_cepstrum_16', '2_cepstrum_17', '2_cepstrum_18', '2_cepstrum_19', '2_cepstrum_20', '2_cepstrum_21', '2_cepstrum_22', '2_cepstrum_23', '2_cepstrum_24', '2_cepstrum_25', '2_cepstrum_26', '2_cepstrum_27', '2_cepstrum_28', '2_cepstrum_29', '2_cepstrum_30', '2_cepstrum_31', '2_cepstrum_32', '2_cepstrum_33', '2_cepstrum_34', '2_cepstrum_35', '2_cepstrum_36', '2_cepstrum_37', '2_cepstrum_38', '2_cepstrum_39', '2_cepstrum_40', '2_cepstrum_41', '2_cepstrum_42', '2_cepstrum_43', '2_cepstrum_44', '2_cepstrum_45', '2_cepstrum_46', '2_cepstrum_47', '2_cepstrum_48', '2_cepstrum_49', '2_cepstrum_50', '2_cepstrum_51', '2_cepstrum_52', '2_cepstrum_53', '2_cepstrum_54', '2_cepstrum_55', '2_cepstrum_56', '2_cepstrum_57', '2_cepstrum_58', '2_cepstrum_59', '2_cepstrum_60', '2_cepstrum_61', '2_cepstrum_62', '2_cepstrum_63', '2_cepstrum_64', '2_cepstrum_65', '2_cepstrum_66', '2_cepstrum_67', '2_cepstrum_68', '2_cepstrum_69', '2_cepstrum_70', '2_cepstrum_71', '2_cepstrum_72', '2_cepstrum_73', '2_cepstrum_74', '2_cepstrum_75', '2_cepstrum_76', '2_cepstrum_77', '2_cepstrum_78', '2_cepstrum_79', '2_cepstrum_80', '2_cepstrum_81', '2_cepstrum_82', '2_cepstrum_83', '2_cepstrum_84', '2_cepstrum_85', '2_cepstrum_86', '2_cepstrum_87', '2_cepstrum_88', '2_cepstrum_89', '2_cepstrum_90', '2_cepstrum_91', '2_cepstrum_92', '2_cepstrum_93', '2_cepstrum_94', '2_cepstrum_95', '2_cepstrum_96', '2_cepstrum_97', '2_cepstrum_98', '2_cepstrum_99', '2_cepstrum_100', '2_cepstrum_101', '2_cepstrum_102', '2_cepstrum_103', '2_cepstrum_104', '2_cepstrum_105', '2_cepstrum_106', '2_cepstrum_107', '2_cepstrum_108', '2_cepstrum_109', '2_cepstrum_110', '2_cepstrum_111', '2_cepstrum_112', '2_cepstrum_113', '2_cepstrum_114', '2_cepstrum_115', '2_cepstrum_116', '2_cepstrum_117', '2_cepstrum_118', '2_cepstrum_119', '2_cepstrum_120', '2_cepstrum_121', '2_cepstrum_122', '2_cepstrum_123', '2_cepstrum_124', '2_cepstrum_125', '2_cepstrum_126', '2_cepstrum_127', '2_cepstrum_128', '2_cepstrum_129', '2_mfcc_1', '2_mfcc_2', '2_mfcc_3', '2_mfcc_4', '2_mfcc_5', '2_mfcc_6', '2_mfcc_7', '2_mfcc_8', '2_mfcc_9', '2_mfcc_10', '2_mfcc_11', '2_mfcc_12', '2_mfcc_13', '2_mfcc_14', '2_mfcc_15', '2_mfcc_16', '2_mfcc_17', '2_mfcc_18', '2_mfcc_19', '2_mfcc_20', '2_mfcc_21', '2_mfcc_22', '2_mfcc_23', '2_mfcc_24', '2_mfcc_25', '2_mfcc_26', '2_mfcc_27', '2_mfcc_28', '2_mfcc_29', '2_mfcc_30', '2_mfcc_31', '2_mfcc_32', '2_mfcc_33', '2_mfcc_34', '2_mfcc_35', '2_mfcc_36', '2_mfcc_37', '2_mfcc_38', '2_peakSample_value', '2_zeroCrossing', '3_attackTime_value', '3_barkSpecBrightness', '3_barkSpec_1', '3_barkSpec_2', '3_barkSpec_3', '3_barkSpec_4', '3_barkSpec_5', '3_barkSpec_6', '3_barkSpec_7', '3_barkSpec_8', '3_barkSpec_9', '3_barkSpec_10', '3_barkSpec_11', '3_barkSpec_12', '3_barkSpec_13', '3_barkSpec_14', '3_barkSpec_15', '3_barkSpec_16', '3_barkSpec_17', '3_barkSpec_18', '3_barkSpec_19', '3_barkSpec_20', '3_barkSpec_21', '3_barkSpec_22', '3_barkSpec_23', '3_barkSpec_24', '3_barkSpec_25', '3_barkSpec_26', '3_barkSpec_27', '3_barkSpec_28', '3_barkSpec_29', '3_barkSpec_30', '3_barkSpec_31', '3_barkSpec_32', '3_barkSpec_33', '3_barkSpec_34', '3_barkSpec_35', '3_barkSpec_36', '3_barkSpec_37', '3_barkSpec_38', '3_barkSpec_39', '3_barkSpec_40', '3_barkSpec_41', '3_barkSpec_42', '3_barkSpec_43', '3_barkSpec_44', '3_barkSpec_45', '3_barkSpec_46', '3_barkSpec_47', '3_barkSpec_48', '3_barkSpec_49', '3_barkSpec_50', '3_bfcc_1', '3_bfcc_2', '3_bfcc_3', '3_bfcc_4', '3_bfcc_5', '3_bfcc_6', '3_bfcc_7', '3_bfcc_8', '3_bfcc_9', '3_bfcc_10', '3_bfcc_11', '3_bfcc_12', '3_bfcc_13', '3_bfcc_14', '3_bfcc_15', '3_bfcc_16', '3_bfcc_17', '3_bfcc_18', '3_bfcc_19', '3_bfcc_20', '3_bfcc_21', '3_bfcc_22', '3_bfcc_23', '3_bfcc_24', '3_bfcc_25', '3_bfcc_26', '3_bfcc_27', '3_bfcc_28', '3_bfcc_29', '3_bfcc_30', '3_bfcc_31', '3_bfcc_32', '3_bfcc_33', '3_bfcc_34', '3_bfcc_35', '3_bfcc_36', '3_bfcc_37', '3_bfcc_38', '3_bfcc_39', '3_bfcc_40', '3_bfcc_41', '3_bfcc_42', '3_bfcc_43', '3_bfcc_44', '3_bfcc_45', '3_bfcc_46', '3_bfcc_47', '3_bfcc_48', '3_bfcc_49', '3_bfcc_50', '3_cepstrum_1', '3_cepstrum_2', '3_cepstrum_3', '3_cepstrum_4', '3_cepstrum_5', '3_cepstrum_6', '3_cepstrum_7', '3_cepstrum_8', '3_cepstrum_9', '3_cepstrum_10', '3_cepstrum_11', '3_cepstrum_12', '3_cepstrum_13', '3_cepstrum_14', '3_cepstrum_15', '3_cepstrum_16', '3_cepstrum_17', '3_cepstrum_18', '3_cepstrum_19', '3_cepstrum_20', '3_cepstrum_21', '3_cepstrum_22', '3_cepstrum_23', '3_cepstrum_24', '3_cepstrum_25', '3_cepstrum_26', '3_cepstrum_27', '3_cepstrum_28', '3_cepstrum_29', '3_cepstrum_30', '3_cepstrum_31', '3_cepstrum_32', '3_cepstrum_33', '3_cepstrum_34', '3_cepstrum_35', '3_cepstrum_36', '3_cepstrum_37', '3_cepstrum_38', '3_cepstrum_39', '3_cepstrum_40', '3_cepstrum_41', '3_cepstrum_42', '3_cepstrum_43', '3_cepstrum_44', '3_cepstrum_45', '3_cepstrum_46', '3_cepstrum_47', '3_cepstrum_48', '3_cepstrum_49', '3_cepstrum_50', '3_cepstrum_51', '3_cepstrum_52', '3_cepstrum_53', '3_cepstrum_54', '3_cepstrum_55', '3_cepstrum_56', '3_cepstrum_57', '3_cepstrum_58', '3_cepstrum_59', '3_cepstrum_60', '3_cepstrum_61', '3_cepstrum_62', '3_cepstrum_63', '3_cepstrum_64', '3_cepstrum_65', '3_cepstrum_66', '3_cepstrum_67', '3_cepstrum_68', '3_cepstrum_69', '3_cepstrum_70', '3_cepstrum_71', '3_cepstrum_72', '3_cepstrum_73', '3_cepstrum_74', '3_cepstrum_75', '3_cepstrum_76', '3_cepstrum_77', '3_cepstrum_78', '3_cepstrum_79', '3_cepstrum_80', '3_cepstrum_81', '3_cepstrum_82', '3_cepstrum_83', '3_cepstrum_84', '3_cepstrum_85', '3_cepstrum_86', '3_cepstrum_87', '3_cepstrum_88', '3_cepstrum_89', '3_cepstrum_90', '3_cepstrum_91', '3_cepstrum_92', '3_cepstrum_93', '3_cepstrum_94', '3_cepstrum_95', '3_cepstrum_96', '3_cepstrum_97', '3_cepstrum_98', '3_cepstrum_99', '3_cepstrum_100', '3_cepstrum_101', '3_cepstrum_102', '3_cepstrum_103', '3_cepstrum_104', '3_cepstrum_105', '3_cepstrum_106', '3_cepstrum_107', '3_cepstrum_108', '3_cepstrum_109', '3_cepstrum_110', '3_cepstrum_111', '3_cepstrum_112', '3_cepstrum_113', '3_cepstrum_114', '3_cepstrum_115', '3_cepstrum_116', '3_cepstrum_117', '3_cepstrum_118', '3_cepstrum_119', '3_cepstrum_120', '3_cepstrum_121', '3_cepstrum_122', '3_cepstrum_123', '3_cepstrum_124', '3_cepstrum_125', '3_cepstrum_126', '3_cepstrum_127', '3_cepstrum_128', '3_cepstrum_129', '3_mfcc_1', '3_mfcc_2', '3_mfcc_3', '3_mfcc_4', '3_mfcc_5', '3_mfcc_6', '3_mfcc_7', '3_mfcc_8', '3_mfcc_9', '3_mfcc_10', '3_mfcc_11', '3_mfcc_12', '3_mfcc_13', '3_mfcc_14', '3_mfcc_15', '3_mfcc_16', '3_mfcc_17', '3_mfcc_18', '3_mfcc_19', '3_mfcc_20', '3_mfcc_21', '3_mfcc_22', '3_mfcc_23', '3_mfcc_24', '3_mfcc_25', '3_mfcc_26', '3_mfcc_27', '3_mfcc_28', '3_mfcc_29', '3_mfcc_30', '3_mfcc_31', '3_mfcc_32', '3_mfcc_33', '3_mfcc_34', '3_mfcc_35', '3_mfcc_36', '3_mfcc_37', '3_mfcc_38', '3_peakSample_value', '3_zeroCrossing', '4_attackTime_value', '4_barkSpecBrightness', '4_barkSpec_1', '4_barkSpec_2', '4_barkSpec_3', '4_barkSpec_4', '4_barkSpec_5', '4_barkSpec_6', '4_barkSpec_7', '4_barkSpec_8', '4_barkSpec_9', '4_barkSpec_10', '4_barkSpec_11', '4_barkSpec_12', '4_barkSpec_13', '4_barkSpec_14', '4_barkSpec_15', '4_barkSpec_16', '4_barkSpec_17', '4_barkSpec_18', '4_barkSpec_19', '4_barkSpec_20', '4_barkSpec_21', '4_barkSpec_22', '4_barkSpec_23', '4_barkSpec_24', '4_barkSpec_25', '4_barkSpec_26', '4_barkSpec_27', '4_barkSpec_28', '4_barkSpec_29', '4_barkSpec_30', '4_barkSpec_31', '4_barkSpec_32', '4_barkSpec_33', '4_barkSpec_34', '4_barkSpec_35', '4_barkSpec_36', '4_barkSpec_37', '4_barkSpec_38', '4_barkSpec_39', '4_barkSpec_40', '4_barkSpec_41', '4_barkSpec_42', '4_barkSpec_43', '4_barkSpec_44', '4_barkSpec_45', '4_barkSpec_46', '4_barkSpec_47', '4_barkSpec_48', '4_barkSpec_49', '4_barkSpec_50', '4_bfcc_1', '4_bfcc_2', '4_bfcc_3', '4_bfcc_4', '4_bfcc_5', '4_bfcc_6', '4_bfcc_7', '4_bfcc_8', '4_bfcc_9', '4_bfcc_10', '4_bfcc_11', '4_bfcc_12', '4_bfcc_13', '4_bfcc_14', '4_bfcc_15', '4_bfcc_16', '4_bfcc_17', '4_bfcc_18', '4_bfcc_19', '4_bfcc_20', '4_bfcc_21', '4_bfcc_22', '4_bfcc_23', '4_bfcc_24', '4_bfcc_25', '4_bfcc_26', '4_bfcc_27', '4_bfcc_28', '4_bfcc_29', '4_bfcc_30', '4_bfcc_31', '4_bfcc_32', '4_bfcc_33', '4_bfcc_34', '4_bfcc_35', '4_bfcc_36', '4_bfcc_37', '4_bfcc_38', '4_bfcc_39', '4_bfcc_40', '4_bfcc_41', '4_bfcc_42', '4_bfcc_43', '4_bfcc_44', '4_bfcc_45', '4_bfcc_46', '4_bfcc_47', '4_bfcc_48', '4_bfcc_49', '4_bfcc_50', '4_cepstrum_1', '4_cepstrum_2', '4_cepstrum_3', '4_cepstrum_4', '4_cepstrum_5', '4_cepstrum_6', '4_cepstrum_7', '4_cepstrum_8', '4_cepstrum_9', '4_cepstrum_10', '4_cepstrum_11', '4_cepstrum_12', '4_cepstrum_13', '4_cepstrum_14', '4_cepstrum_15', '4_cepstrum_16', '4_cepstrum_17', '4_cepstrum_18', '4_cepstrum_19', '4_cepstrum_20', '4_cepstrum_21', '4_cepstrum_22', '4_cepstrum_23', '4_cepstrum_24', '4_cepstrum_25', '4_cepstrum_26', '4_cepstrum_27', '4_cepstrum_28', '4_cepstrum_29', '4_cepstrum_30', '4_cepstrum_31', '4_cepstrum_32', '4_cepstrum_33', '4_cepstrum_34', '4_cepstrum_35', '4_cepstrum_36', '4_cepstrum_37', '4_cepstrum_38', '4_cepstrum_39', '4_cepstrum_40', '4_cepstrum_41', '4_cepstrum_42', '4_cepstrum_43', '4_cepstrum_44', '4_cepstrum_45', '4_cepstrum_46', '4_cepstrum_47', '4_cepstrum_48', '4_cepstrum_49', '4_cepstrum_50', '4_cepstrum_51', '4_cepstrum_52', '4_cepstrum_53', '4_cepstrum_54', '4_cepstrum_55', '4_cepstrum_56', '4_cepstrum_57', '4_cepstrum_58', '4_cepstrum_59', '4_cepstrum_60', '4_cepstrum_61', '4_cepstrum_62', '4_cepstrum_63', '4_cepstrum_64', '4_cepstrum_65', '4_cepstrum_66', '4_cepstrum_67', '4_cepstrum_68', '4_cepstrum_69', '4_cepstrum_70', '4_cepstrum_71', '4_cepstrum_72', '4_cepstrum_73', '4_cepstrum_74', '4_cepstrum_75', '4_cepstrum_76', '4_cepstrum_77', '4_cepstrum_78', '4_cepstrum_79', '4_cepstrum_80', '4_cepstrum_81', '4_cepstrum_82', '4_cepstrum_83', '4_cepstrum_84', '4_cepstrum_85', '4_cepstrum_86', '4_cepstrum_87', '4_cepstrum_88', '4_cepstrum_89', '4_cepstrum_90', '4_cepstrum_91', '4_cepstrum_92', '4_cepstrum_93', '4_cepstrum_94', '4_cepstrum_95', '4_cepstrum_96', '4_cepstrum_97', '4_cepstrum_98', '4_cepstrum_99', '4_cepstrum_100', '4_cepstrum_101', '4_cepstrum_102', '4_cepstrum_103', '4_cepstrum_104', '4_cepstrum_105', '4_cepstrum_106', '4_cepstrum_107', '4_cepstrum_108', '4_cepstrum_109', '4_cepstrum_110', '4_cepstrum_111', '4_cepstrum_112', '4_cepstrum_113', '4_cepstrum_114', '4_cepstrum_115', '4_cepstrum_116', '4_cepstrum_117', '4_cepstrum_118', '4_cepstrum_119', '4_cepstrum_120', '4_cepstrum_121', '4_cepstrum_122', '4_cepstrum_123', '4_cepstrum_124', '4_cepstrum_125', '4_cepstrum_126', '4_cepstrum_127', '4_cepstrum_128', '4_cepstrum_129', '4_mfcc_1', '4_mfcc_2', '4_mfcc_3', '4_mfcc_4', '4_mfcc_5', '4_mfcc_6', '4_mfcc_7', '4_mfcc_8', '4_mfcc_9', '4_mfcc_10', '4_mfcc_11', '4_mfcc_12', '4_mfcc_13', '4_mfcc_14', '4_mfcc_15', '4_mfcc_16', '4_mfcc_17', '4_mfcc_18', '4_mfcc_19', '4_mfcc_20', '4_mfcc_21', '4_mfcc_22', '4_mfcc_23', '4_mfcc_24', '4_mfcc_25', '4_mfcc_26', '4_mfcc_27', '4_mfcc_28', '4_mfcc_29', '4_mfcc_30', '4_mfcc_31', '4_mfcc_32', '4_mfcc_33', '4_mfcc_34', '4_mfcc_35', '4_mfcc_36', '4_mfcc_37', '4_mfcc_38', '4_peakSample_value', '4_zeroCrossing', '5_attackTime_value', '5_barkSpecBrightness', '5_barkSpec_1', '5_barkSpec_2', '5_barkSpec_3', '5_barkSpec_4', '5_barkSpec_5', '5_barkSpec_6', '5_barkSpec_7', '5_barkSpec_8', '5_barkSpec_9', '5_barkSpec_10', '5_barkSpec_11', '5_barkSpec_12', '5_barkSpec_13', '5_barkSpec_14', '5_barkSpec_15', '5_barkSpec_16', '5_barkSpec_17', '5_barkSpec_18', '5_barkSpec_19', '5_barkSpec_20', '5_barkSpec_21', '5_barkSpec_22', '5_barkSpec_23', '5_barkSpec_24', '5_barkSpec_25', '5_barkSpec_26', '5_barkSpec_27', '5_barkSpec_28', '5_barkSpec_29', '5_barkSpec_30', '5_barkSpec_31', '5_barkSpec_32', '5_barkSpec_33', '5_barkSpec_34', '5_barkSpec_35', '5_barkSpec_36', '5_barkSpec_37', '5_barkSpec_38', '5_barkSpec_39', '5_barkSpec_40', '5_barkSpec_41', '5_barkSpec_42', '5_barkSpec_43', '5_barkSpec_44', '5_barkSpec_45', '5_barkSpec_46', '5_barkSpec_47', '5_barkSpec_48', '5_barkSpec_49', '5_barkSpec_50', '5_bfcc_1', '5_bfcc_2', '5_bfcc_3', '5_bfcc_4', '5_bfcc_5', '5_bfcc_6', '5_bfcc_7', '5_bfcc_8', '5_bfcc_9', '5_bfcc_10', '5_bfcc_11', '5_bfcc_12', '5_bfcc_13', '5_bfcc_14', '5_bfcc_15', '5_bfcc_16', '5_bfcc_17', '5_bfcc_18', '5_bfcc_19', '5_bfcc_20', '5_bfcc_21', '5_bfcc_22', '5_bfcc_23', '5_bfcc_24', '5_bfcc_25', '5_bfcc_26', '5_bfcc_27', '5_bfcc_28', '5_bfcc_29', '5_bfcc_30', '5_bfcc_31', '5_bfcc_32', '5_bfcc_33', '5_bfcc_34', '5_bfcc_35', '5_bfcc_36', '5_bfcc_37', '5_bfcc_38', '5_bfcc_39', '5_bfcc_40', '5_bfcc_41', '5_bfcc_42', '5_bfcc_43', '5_bfcc_44', '5_bfcc_45', '5_bfcc_46', '5_bfcc_47', '5_bfcc_48', '5_bfcc_49', '5_bfcc_50', '5_cepstrum_1', '5_cepstrum_2', '5_cepstrum_3', '5_cepstrum_4', '5_cepstrum_5', '5_cepstrum_6', '5_cepstrum_7', '5_cepstrum_8', '5_cepstrum_9', '5_cepstrum_10', '5_cepstrum_11', '5_cepstrum_12', '5_cepstrum_13', '5_cepstrum_14', '5_cepstrum_15', '5_cepstrum_16', '5_cepstrum_17', '5_cepstrum_18', '5_cepstrum_19', '5_cepstrum_20', '5_cepstrum_21', '5_cepstrum_22', '5_cepstrum_23', '5_cepstrum_24', '5_cepstrum_25', '5_cepstrum_26', '5_cepstrum_27', '5_cepstrum_28', '5_cepstrum_29', '5_cepstrum_30', '5_cepstrum_31', '5_cepstrum_32', '5_cepstrum_33', '5_cepstrum_34', '5_cepstrum_35', '5_cepstrum_36', '5_cepstrum_37', '5_cepstrum_38', '5_cepstrum_39', '5_cepstrum_40', '5_cepstrum_41', '5_cepstrum_42', '5_cepstrum_43', '5_cepstrum_44', '5_cepstrum_45', '5_cepstrum_46', '5_cepstrum_47', '5_cepstrum_48', '5_cepstrum_49', '5_cepstrum_50', '5_cepstrum_51', '5_cepstrum_52', '5_cepstrum_53', '5_cepstrum_54', '5_cepstrum_55', '5_cepstrum_56', '5_cepstrum_57', '5_cepstrum_58', '5_cepstrum_59', '5_cepstrum_60', '5_cepstrum_61', '5_cepstrum_62', '5_cepstrum_63', '5_cepstrum_64', '5_cepstrum_65', '5_cepstrum_66', '5_cepstrum_67', '5_cepstrum_68', '5_cepstrum_69', '5_cepstrum_70', '5_cepstrum_71', '5_cepstrum_72', '5_cepstrum_73', '5_cepstrum_74', '5_cepstrum_75', '5_cepstrum_76', '5_cepstrum_77', '5_cepstrum_78', '5_cepstrum_79', '5_cepstrum_80', '5_cepstrum_81', '5_cepstrum_82', '5_cepstrum_83', '5_cepstrum_84', '5_cepstrum_85', '5_cepstrum_86', '5_cepstrum_87', '5_cepstrum_88', '5_cepstrum_89', '5_cepstrum_90', '5_cepstrum_91', '5_cepstrum_92', '5_cepstrum_93', '5_cepstrum_94', '5_cepstrum_95', '5_cepstrum_96', '5_cepstrum_97', '5_cepstrum_98', '5_cepstrum_99', '5_cepstrum_100', '5_cepstrum_101', '5_cepstrum_102', '5_cepstrum_103', '5_cepstrum_104', '5_cepstrum_105', '5_cepstrum_106', '5_cepstrum_107', '5_cepstrum_108', '5_cepstrum_109', '5_cepstrum_110', '5_cepstrum_111', '5_cepstrum_112', '5_cepstrum_113', '5_cepstrum_114', '5_cepstrum_115', '5_cepstrum_116', '5_cepstrum_117', '5_cepstrum_118', '5_cepstrum_119', '5_cepstrum_120', '5_cepstrum_121', '5_cepstrum_122', '5_cepstrum_123', '5_cepstrum_124', '5_cepstrum_125', '5_cepstrum_126', '5_cepstrum_127', '5_cepstrum_128', '5_cepstrum_129', '5_mfcc_1', '5_mfcc_2', '5_mfcc_3', '5_mfcc_4', '5_mfcc_5', '5_mfcc_6', '5_mfcc_7', '5_mfcc_8', '5_mfcc_9', '5_mfcc_10', '5_mfcc_11', '5_mfcc_12', '5_mfcc_13', '5_mfcc_14', '5_mfcc_15', '5_mfcc_16', '5_mfcc_17', '5_mfcc_18', '5_mfcc_19', '5_mfcc_20', '5_mfcc_21', '5_mfcc_22', '5_mfcc_23', '5_mfcc_24', '5_mfcc_25', '5_mfcc_26', '5_mfcc_27', '5_mfcc_28', '5_mfcc_29', '5_mfcc_30', '5_mfcc_31', '5_mfcc_32', '5_mfcc_33', '5_mfcc_34', '5_mfcc_35', '5_mfcc_36', '5_mfcc_37', '5_mfcc_38', '5_peakSample_value', '5_zeroCrossing']

AUTO_FEATURE_NUMBER = len(selected_features)
if args['features'] == 'all':
    assert len(selected_features) == original_feature_number

    if DATA_IS_WINDOWED:
        AUTO_FEATURE_NUMBER //= len(WINDOW_INDEXES)

dataset_features = original_dataset_features.copy().loc[:,selected_features]

if FEATURE_SELECTION == FeatureSelection.NONE:
    print("No feature selection applied")
    assert AUTO_FEATURE_NUMBER * len(WINDOW_INDEXES) == dataset_features.shape[1], "ERROR: the number of features selected is not correct (%d * %d != %d)"%(AUTO_FEATURE_NUMBER,len(WINDOW_INDEXES),dataset_features.shape[1])
    print('%d features are kept'%(AUTO_FEATURE_NUMBER),('for each of the %d windows'%len(WINDOW_INDEXES) if DATA_IS_WINDOWED else ''))
else:
    print("Features reduced "+('manually' if (FEATURE_SELECTION == FeatureSelection.MANUAL_LIST or FEATURE_SELECTION == FeatureSelection.MANUAL_VARIABLES) else 'automatically')+" ("+str(FEATURE_SELECTION)+") from "+str(original_feature_number)+" to : "+str(dataset_features.shape[1]))
    print('%d features for each of the %d windows'%(AUTO_FEATURE_NUMBER,len(set([e.split('_')[0] for e in original_dataset_features.columns[original_dataset_features.columns.str.match('\d_')].to_list()]))) if DATA_IS_WINDOWED else '')


old_threshold = np.get_printoptions()['threshold']
np.set_printoptions(threshold=np.inf)
print("Features selected: "+str(dataset_features.columns.to_list()))
np.set_printoptions(threshold=old_threshold)

## Evaluate class support
(What percentage of dataset entries represent each class)

In [None]:
DO_PRINT_SUPPORT = False
def printSupport (labels_ds):
    binc = np.bincount(np.reshape(labels_ds,labels_ds.size))
    for i in range(binc.size):
        print("Class " + str(i) + " support: " + str("{:.2f}".format(binc[i]/sum(binc) * 100)) + "%")
        
if DO_PRINT_SUPPORT:
    printSupport(dataset_labels.to_numpy())

# Define model architecture,
Loss, optimizer and compile model

In [None]:
def define_model_architecture(num_classes:int, _verbose = False):
    tf.keras.backend.set_floatx('float32')

    sequential_structure = []
    if WINDOWED_INPUT_MODE == WindowedInputMode._2D:
        convLayer = tf.keras.layers.Conv2D
        maxpoolLayer = tf.keras.layers.MaxPooling2D
        avgpoolLayer = tf.keras.layers.AveragePooling2D
        input_shape = (len(WINDOW_INDEXES), AUTO_FEATURE_NUMBER, 1)
    else:
        convLayer = tf.keras.layers.Conv1D
        maxpoolLayer = tf.keras.layers.MaxPooling1D
        avgpoolLayer = tf.keras.layers.AveragePooling1D
        input_shape = (AUTO_FEATURE_NUMBER, 1)

    count_for_layertype = {}
    def getAndIncreaseOrAdd(_dict,_key):
        if _key in _dict:
            orig = _dict[_key]
            _dict[_key] += 1
            return orig
        else:
            _dict[_key] = 1
            return 0

    def getAndCount(layername):
        return getAndIncreaseOrAdd(count_for_layertype,layername)

    def getname(layername):
        return '%s_%d'%(layername,getAndCount(layername))


    if args['conv'] > 0:
        for i in range(args['conv']):
            if i == 0:
                sequential_structure.append(convLayer(filters=FILTERS[i], kernel_size=KERNEL_SIZES[i], strides=STRIDES[i], padding=PADDING[i], activation=CONV_ACTIVATIONS[i], input_shape=input_shape,name=getname('conv') ))
            else:
                sequential_structure.append(convLayer(filters=FILTERS[i], kernel_size=KERNEL_SIZES[i], strides=STRIDES[i], padding=PADDING[i], activation=CONV_ACTIVATIONS[i],name=getname('conv')))

            sequential_structure += [tf.keras.layers.BatchNormalization(name=getname('batchnorm'))]

            if POOL_LAYERS[i] != 'N':
                pname = getname(POOL_LAYERS[i]+'pool')
                if POOL_LAYERS[i] == 'M':
                    sequential_structure += [maxpoolLayer(pool_size=POOL_SIZES[i],name=pname)]
                elif POOL_LAYERS[i] == 'A':
                    sequential_structure += [avgpoolLayer(pool_size=POOL_SIZES[i],name=pname)]

        sequential_structure += [tf.keras.layers.Flatten(name=getname('flatten')),
                                 tf.keras.layers.Dropout(args['dropout'],name=getname('dropout'))]

    for i in range(args['net_depth']):
        sequential_structure += [tf.keras.layers.Dense(args['net_width'],activation='relu',
                                                       kernel_initializer='he_uniform',
                                                       name=getname('dense')),
                                 tf.keras.layers.BatchNormalization(name=getname('batchnorm')),
                                 tf.keras.layers.Dropout(args['dropout'],name=getname('dropout'))
                                ]

    sequential_structure += [tf.keras.layers.Dense(num_classes,name='OUT_'+getname('dense'))]               

    model = tf.keras.models.Sequential(sequential_structure)

    model._name = "guitar_timbre_classifier_" + strftime("%Y%m%d-%H%M%S")

    return model

def get_loss():
    return tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
def compile_model(model,optimizer,loss_fn,_verbose = False):
    opt = None
    if optimizer["method"] == "sgd":
        opt = tf.keras.optimizers.SGD(learning_rate = optimizer["learning_rate"], momentum=optimizer["momentum"])
    elif optimizer["method"] == "adam":
        opt = tf.keras.optimizers.Adam(learning_rate = optimizer["learning_rate"])
    else:
        raise Exception("Optimizer method not supported")

    model.compile(optimizer=opt,
                  loss=loss_fn,
                  metrics=['accuracy'])

    #----------------------------------------------------------#
    # Why did we not add here a custom metric for F1-SCORE?    #
    # In particular, we wanted ** Macro Average - F1-score **  #
    # which is the (non-weighted) average of the F1-score for  #
    # each class.                                              #
    # Thus is a relevnt metric for our problem, since we have  #
    # class imbalance.                                         #
    #                                                          #
    # The issue seems to be the way that tensorflow handles    #
    # metric computation.                                      #
    # The metric is computed for each batch, and then the      #
    # average is computed.                                     #
    # Also, custom metrics seems to require the use of tensor  #
    # operations, and converting to numpy arrays does not seem #
    # to be supported FOR COMPUTATIONS                         #
    # https://stackoverflow.com/a/52659570                     #
    # https://stackoverflow.com/a/52659570/10930862            #
    #----------------------------------------------------------#
    
    if _verbose:
        print("Model compiled")

# Save Models and Info functions

In [None]:
def save_model_info(model,optimizer,final_cross_validation_results,folds,metrics,outpath, fold_zerobased = None, smote_strategy = None,_additional_info_to_save = None):
    info_filename = '/info.txt' if fold_zerobased is None else '/info_fold_'+str(fold_zerobased+1)+'.txt'
    assert not (final_cross_validation_results and (fold_zerobased is not None))

    with open(outpath + info_filename, "w") as f:
        if not is_notebook():
            f.write('Execution command:\n')
            f.write(" ".join(sys_argv[:])+'\n')
        else:
            f.write('Trained with the jupyter notebook (not the script version)\n')
        f.write("\n\n")

        if DO_OVERSAMPLING:
            f.write("Oversampling (SMOTE) with aggressiveness: "+str(OVERSAMPLING_AGGRESSIVENESS)+'\n')
            if smote_strategy is not None:
                f.write("SMOTE strategy: "+str(smote_strategy)+'\n')
            else:
                f.write("SMOTE strategy: "+str(SMOTE_STRATEGY)+')\n')
        else:
            f.write('NOT performing Oversampling'+'\n')

        if USE_AUGMENTED_DATA:
            f.write('Using augmented data'+'\n')
            f.write('Augmented audio file paths: ' + ','.join([os.path.basename(x) for x in augmented_data_paths])+'\n')
            f.write('Loaded '+str(len(augmented_featuredataset))+' augmented samples'+'\n')
            f.write('Used '+str(len(features_aug))+' augmented samples after filtering the dataset'+'\n')

            if fold_zerobased is not None:
                f.write('Augmented data used in this fold: '+str(len(train_aug_indexes[fold_zerobased]))+'\n')
            else:
                f.write('Augmented data used for all splits: \n')
                for split_idx in train_aug_indexes:
                    f.write(str(len(train_aug_indexes[split_idx]))+'\n')


        f.write("\n\n")

        if fold_zerobased is not None:
            f.write("FOLD ["+str(fold_zerobased+1)+"/"+str(folds)+"]\n\n")
        f.write("Summary:\n")
        model.summary(print_fn=lambda x: f.write(x + '\n'))
        f.write("\n\n")
        f.write('Data read from file '+DATASET_FILENAME+'\n')
        if FEATURE_WINDOW_SIZE == FeatureWindowSize.s4800_SAMPLES_100ms:
            f.write('Window size of 4800 samples (100ms)\n')
        elif FEATURE_WINDOW_SIZE == FeatureWindowSize.s704_Samples_14ms:
            f.write('Window size of 704 samples (~14ms)\n')
        elif FEATURE_WINDOW_SIZE == FeatureWindowSize._704windowed:
            f.write('Window size of 704 samples (~14ms) WITH WINDOWING\n')
            f.write('%d overlapping windows'%(len(WINDOW_INDEXES)))
        elif FEATURE_WINDOW_SIZE == FeatureWindowSize._2112windowed:
            f.write('Window size of 2112 samples (~44ms) WITH WINDOWING\n')
            f.write('%d overlapping windows'%(len(WINDOW_INDEXES)))
        elif FEATURE_WINDOW_SIZE == FeatureWindowSize._3456windowed:
            f.write('Window size of 3456 samples (~72ms) WITH WINDOWING\n')
            f.write('%d overlapping windows'%(len(WINDOW_INDEXES)))
        elif FEATURE_WINDOW_SIZE == FeatureWindowSize._4800windowed:
            f.write('Window size of 4800 samples (~100ms) WITH WINDOWING\n')
            f.write('%d overlapping windows'%(len(WINDOW_INDEXES)))
        else:
            raise ValueError('Invalid FeatureWindowSize "%s"'%FeatureWindowSize.name)

        f.write("\n\n")
        f.write("+--| Features: \n")
        f.write('Number of features selected: '+str(len(selected_features))+'\n')
        f.write('Selected features: '+str(selected_features)+'\n')
        f.write('Feature Selection method: '+str(FEATURE_SELECTION)+'\n')
        f.write("\n\n")
        if DO_NORMALIZE_DATA:
            f.write("Data was normalized. Find the parameters at the end of the file, and the scaler in the pickle file 'scaler.pickle'\n")
        else:
            f.write("Data was NOT normalized")
        f.write("\n\n")
        f.write('Run arguments: '+str(args)+'\n')
        f.write("\n\n")
        f.write("Optimizer: " + optimizer["method"])
        if optimizer["method"] == "sgd":
            f.write(" lr: " + str(optimizer["learning_rate"]) + " momentum: " + str(optimizer["momentum"]))
        elif optimizer["method"] == "adam":
            f.write(" lr: " + str(optimizer["learning_rate"]))
        else:
            assert(False) # If triggered check new optimizer and add case
        f.write("\n\n")
        if final_cross_validation_results:
            f.write("Trained for " + str(args['epochs']) + " epochs and with_batch size '" + str(args['batchsize']) + "'" + " epochs for each fold ("+str(folds)+"-foldCrossValidation)\n")
            f.write("Single results in the folds directories\n")
            f.write('\n\n-------- Average results --------\n\n')
        else:
            f.write("Trained for " + str(args['epochs']) + " epochs and with_batch size '" + str(args['batchsize']) + "'" + " epochs\n")

            if fold_zerobased is not None:
                f.write('(K-Fold cross validation run (fold '+str(fold_zerobased+1)+'/' +str(folds)+ '))\n')
            else:
                f.write('(Single run, NO k-fold cross validation)\n')

        for metric in metrics.keys():
            value = metrics[metric] if fold_zerobased is None else metrics[metric][fold_zerobased]
            f.write(str(metric) + ":\n" + str(value) + "\n\n")

            if 'DICT' in metric:
                with open(os.path.join(outpath, metric.replace('DICT','')+'.txt'), 'w') as extra_mfile:
                    extra_mfile.write(str(value))

        if _additional_info_to_save and _additional_info_to_save != "":
            f.write("\n\n")
            f.write("Additional info:\n")
            f.write(_additional_info_to_save)
        f.close()

    # Copy Tensorboard Logs
    if fold_zerobased == None and DO_SAVE_TENSORBOARD_LOGS:
        LOGPATH=outpath+"/tensorboardlogs"
        shutil.copytree('./logs', LOGPATH)

    if not COLAB and fold_zerobased == None:
        # Copy script or notebook depending on the execution environment
        script_path = None
        if is_notebook():
            script_path = 'expressive-technique-classifier-phase3.ipynb'
            pass #TODO: make this work
        else:
            script_path = os.path.realpath(__file__)
        shutil.copyfile(script_path, os.path.join(outpath, 'backup_'+os.path.basename(script_path)))

# Prepare Logs

In [None]:
def plot_history(train_metric, validation_metric, title, xlabel, ylabel, filename=None, show = False):
    fig, ax = plt.subplots(figsize=(5, 3))
    ax.set_title(title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.plot(train_metric)
    ax.plot(validation_metric)
    ax.legend(['Training','Validation'])
    # if show:
    #     fig.show()
    if filename is not None:
        plt.savefig(filename+".pdf",bbox_inches='tight')
        plt.close(); plt.ioff()

F1-Score on Test dataset

In [None]:
def macroweighted_f1(y_true,y_pred):
    f1scores = []
    numSamples = []
    for selclass in CLASSES:
        classSelection = (y_true == (np.ones(np.shape(y_true)[0])*selclass))
        numSamples.append(sum(classSelection))
        classPrediction = (y_pred == (np.ones(np.shape(y_true)[0])*selclass))
        true_positives = np.sum(np.logical_and(classSelection,(y_true == y_pred)))

        precision = 1.0 * true_positives / np.sum(classPrediction)
        recall = 1.0 * true_positives / np.sum(classSelection)
        f1score = 2 /((1/precision)+(1/recall))
        f1scores.append(f1score)
    macroWeightedF1 = sum(np.array(f1scores) * np.array(numSamples)) / sum(numSamples)
    return macroWeightedF1

In [None]:
def compute_metrics(y_true, y_pred,_verbose = False):
    accuracy = np.sum(y_pred == y_true)/np.shape(y_true)[0]
    f1mw = macroweighted_f1(y_true,y_pred)
    confusion_matrix = sk_conf_matrix(y_true, y_pred)
    
    assert len(y_true) == len(y_pred), 'The "y_true" and "y_pred" arrays have a different length' 

    assert len(np.unique(y_true)) >= len(np.unique(y_pred)) 
    assert np.isin(np.unique(y_pred),np.unique(y_true)).all(), 'Some classes in y_pred are not in y_true ('+str(np.setdiff1d(y_pred,y_true))+')'
    
    classification_report = sk_class_report(y_true, y_pred, digits=6,target_names = CLASSES_DESC.values(),output_dict=True)
    printable_classification_report = sk_class_report(y_true, y_pred, digits=4,target_names = CLASSES_DESC.values())

    if _verbose:
        print("Test Accuracy: " + str(accuracy) + "\nTest macro_weighted_avg f1-score: " + str(f1mw)+'\n'+str(confusion_matrix)+'\n'+str(printable_classification_report))

    return accuracy, f1mw, confusion_matrix, classification_report, printable_classification_report


def pretty_confusion_matrix(confusion_matrix,savepath,labels):
    assert os.path.exists(os.path.dirname(savepath)), 'The path to save the confusion matrix does not exist'
    cmplot = sk_conf_matrix_disp(confusion_matrix,display_labels=labels).plot()
    plt.xticks(rotation = 45)
    plt.savefig(savepath,bbox_inches='tight')
    # plt.show()
    plt.close(); plt.ioff()

# Prepare TFLite conversion and evaluation

In [None]:
# TFLite conversion function
def convert2tflite(tf_model_dir,tflite_model_dir = None,model_name="model",quantization=None,dataset=None):
    assert (quantization==None or quantization=="dynamic" or quantization=="float-fallback" or quantization=="full")
    # Convert the model saved in the previous step.
    converter = tf.lite.TFLiteConverter.from_saved_model(tf_model_dir)
    if quantization is not None:
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        if quantization == "full" or quantization=="float-fallback":
            assert dataset is not None
            def representative_dataset_gen():
                for data in tf.data.Dataset.from_tensor_slices((dataset)).batch(1).take(100):
                    yield [tf.dtypes.cast(data, tf.float32)]
            converter.representative_dataset = representative_dataset_gen
        if quantization == "full":
            converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
            converter.inference_input_type = tf.int8  # or tf.uint8
            converter.inference_output_type = tf.int8  # or tf.uint8
        if quantization == "dynamic":
            assert dataset is None

    tflite_model = converter.convert()

    # Save the TF Lite model.
    if tflite_model_dir is None:
        TF_MODEL_PATH = tf_model_dir + "/" + model_name + '.tflite'
    else:
        TF_MODEL_PATH = tflite_model_dir + "/" + model_name + '.tflite'

    with tf.io.gfile.GFile(TF_MODEL_PATH, 'wb') as f:
        f.write(tflite_model)

## USAGE
# model_path = MODELFOLDER + "/" + RUN_NAME + "/fold_1"
# convert2tflite(model_path)

In [None]:
def test_tflite_model(model_path,X_test,y_test,first_layer_is_conv,verbose_test = False):
    tflite_interpreter = tf.lite.Interpreter(model_path=model_path)
    input_details = tflite_interpreter.get_input_details()[0]
    output_details = tflite_interpreter.get_output_details()[0]
    
    if verbose_test:
        print("+--------------------------------------------+\n| Testing the TF lite model saved            |\n+--------------------------------------------+\n")
        print("[Model loaded]\n")
        print("\n== Input details ==\nname:"+ str(input_details['name']) + "\nshape:"+str(input_details['shape']) +  "\ntype:"+str(input_details['dtype']))
        print("\n== Output details ==\nname:"+str(output_details['name']) + "\nshape:"+str(output_details['shape']) + "\ntype:"+str(output_details['dtype']))
        print("+--------------------------------------------+\n| Testing on TEST set...                     |\n+--------------------------------------------+\n")
    
    tflite_interpreter.allocate_tensors()
    y_pred = list()
    for i in range(X_test.shape[0]):
        extracted_test_sample = np.array(X_test[i:i+1]).astype(np.float32)
        
        # Quantize inputs if necessary (full uint model)
        if input_details['dtype'] is np.int8:
            input_scale, input_zero_point = input_details["quantization"]
            extracted_test_sample = (extracted_test_sample / input_scale + input_zero_point).astype(np.int8)

        if first_layer_is_conv:
            input_tensor = np.expand_dims(extracted_test_sample,axis=2).astype(input_details["dtype"])
        else:
            input_tensor = extracted_test_sample

        if verbose_test:
            print("Setting "+str(input_tensor.shape)+" "+str(input_tensor.dtype)+" as input")

        tflite_interpreter.set_tensor(input_details['index'], input_tensor)
        tflite_interpreter.invoke()
        prediction_vec = tflite_interpreter.get_tensor(output_details['index'])

        if verbose_test:
            print("Getting "+str(prediction_vec.shape)+" "+str(prediction_vec.dtype)+" as output")

        if output_details['dtype'] is np.int8:
            output_scale, output_zero_point = output_details["quantization"]
            prediction_vec = (prediction_vec + output_zero_point) * output_scale

        if verbose_test:
            print(prediction_vec)
        y_pred.append(np.argmax(prediction_vec))
    return y_pred

def test_regulartf_model(model_path,X_test,y_test,first_layer_is_conv,verbose_test = False):
    imported = tf.keras.models.load_model(model_path)
    if first_layer_is_conv:
        test_set = np.expand_dims(X_test,axis=2)
    else:
        test_set = X_test
    _, accuracy = imported.evaluate(test_set,  y_test, verbose=2)
    return accuracy

# k-Fold Cross Validation


In [None]:
# --> Epochs / Batches
print('Using training epochs: ', args['epochs'])
print('Using batch size: ', args['batchsize'])

# --> Early Stopping
use_early_stopping = False

# --> OVERSAMPLING ################################################
DO_OVERSAMPLING = args['oversampling_aggressiveness'] > 0.0       #
OVERSAMPLING_AGGRESSIVENESS = args['oversampling_aggressiveness'] #
VERBOSE_OVERSAMPLING = True                                       #
SMOTE_STRATEGY = {} # Do not set this variable
###################################################################

# --> KFOLD RUN #################################################
K_SPLITS = args['k_folds']
USE_CROSS_VALIDATION = K_SPLITS > 1 # Activate K-Fold Cross Validation only if K_SPLITS > 1
VAL_SPLIT_SIZE = 0.1                                            # percentage of total entries going into the validation set
# TODO: this is something to fix.
# For the custom splitter that keeps guitarists separate, there
# is the need to take the validation set from the test and not 
# the train set. To do this we now have two percentages.
# TODO: change all code to always take the validation percentage
# from the test set, so that there can be a single percentage 
# constant.
VAL_SPLIT_SIZE_TESTPERC = 0.3                                   # percentage of test entries going into the validation set
random_state = global_random_state                              # seed for pseudo random generator
#################################################################

# --> SINGLE RUN ################################################
SAVE_MODEL_INFO = True                                          #
test_split_size = 0.2                                           #
#################################################################

DO_TEST = False

# optimizer = { "method" : "sgd", "learning_rate" : args['learning_rate'], "momentum" : 0.7 }
optimizer = { "method" : "adam", "learning_rate" : args['learning_rate'] }

In [None]:
def oversample(features: list, labels: list, aggressiveness = 1, verbose: bool = False):
    if verbose:
        print("Oversampling...")
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        unique, counts = np.unique(labels, return_counts=True)
        target_count = int(round(max(counts) * aggressiveness,0))
        sampling_strategy = dict(zip(unique, counts))
        print('Label count in fold:',dict(zip(unique, counts)))
        sampling_strategy = {k:(v if v > target_count else target_count) for k,v in sampling_strategy.items()}
        smote_strategy = sampling_strategy

        print('sampling_strategy:',sampling_strategy)

        ovs_features, ovs_labels = imblearn.over_sampling.SMOTE(sampling_strategy=sampling_strategy).fit_resample(features, labels)

        if verbose:
            print("Increased training samples from " + str(features.shape[0]) + " to " + str(ovs_features.shape[0]))
            printSupport(ovs_labels)
    return ovs_features, ovs_labels, smote_strategy

In [None]:
# Useful reshaping functions to prepare data for the first CNN layer (1D or 2D)
dataTo1DConv = lambda x: np.expand_dims(x,axis = -1)
dataTo2DConv = lambda x: np.expand_dims(x.reshape((len(x),len(WINDOW_INDEXES),AUTO_FEATURE_NUMBER)),axis = -1)

In [None]:
prefix = "guittouch_CrossValidated" if USE_CROSS_VALIDATION else "Single"
RUN_NAME = prefix + "Run_"+strftime("%Y%m%d-%H%M%S")
OUTPUT_DIR = os.path.join(MODELFOLDER,RUN_NAME)
os.makedirs(OUTPUT_DIR,exist_ok = True) 


In [None]:
'''Call the main routine for each fold'''
result_model = []
train_aug_indexes = {}
X,y = dataset_features.to_numpy(), dataset_labels.to_numpy()


# Cross Validation average results

## Utilities for reports and metrics

In [None]:
def report_average(reports):
    mean_dict = dict()
    for label in reports[0].keys():
        dictionary = dict()

        if label in 'accuracy':
            mean_dict[label] = sum(d[label] for d in reports) / len(reports)
            continue

        for key in reports[0][label].keys():
            dictionary[key] = sum(d[label][key] for d in reports) / len(reports)
        mean_dict[label] = dictionary

    return mean_dict

def classification_report_dict2print(report):
    ret = ""
    classes = list(report.keys())[0:-3]
    summary_metrics = list(report.keys())[-3:]
    longest_1st_column_name = max([len(key) for key in report.keys()])
    ret = ' ' * longest_1st_column_name
    ret += '  precision    recall  f1-score   support\n\n'

    METRIC_DECIMAL_DIGITS = 4
    metric_digits = METRIC_DECIMAL_DIGITS + 2 # add 0 and dot

    header_spacing = 1
    metrics = list(report[classes[0]].keys())
    longest_1st_row_name = max([len(key) for key in report[classes[0]].keys()]) + header_spacing

    for classname in classes:
        ret += (' '*(longest_1st_column_name-len(classname))) + classname + ' '
        for metric in metrics:
            if metric != "support":
                ret += (' '*(longest_1st_row_name-metric_digits))
                ret += "%.4f" % round(report[classname][metric],METRIC_DECIMAL_DIGITS)
            else:
                current_support_digits = len(str(int(report[classname][metric])))
                ret += (' '*(longest_1st_row_name-current_support_digits))
                ret += "%d" % round(report[classname][metric],0)
        ret += '\n'
    ret += '\n'

    # Accuracy
    ret += (' '*(longest_1st_column_name-len(summary_metrics[0]))) + summary_metrics[0] + ' '
    ret += 2* (' '*longest_1st_row_name)
    ret += (' '*(longest_1st_row_name-metric_digits))
    ret += "%.4f" % round(report["accuracy"],METRIC_DECIMAL_DIGITS)
    current_support_digits = len(str(int(report[summary_metrics[-1]]['support'])))
    ret += (' '*(longest_1st_row_name-current_support_digits))
    ret += "%d" % round(report[summary_metrics[-1]]['support'],0)
    ret += '\n'
  
  
    for classname in summary_metrics[1:]:
        ret += (' '*(longest_1st_column_name-len(classname))) + classname + ' '
        for metric in metrics:
            if metric != "support":
                ret += (' '*(longest_1st_row_name-metric_digits))
                ret += "%.4f" % round(report[classname][metric],METRIC_DECIMAL_DIGITS)
            else:
                current_support_digits = len(str(int(report[classname][metric])))
                ret += (' '*(longest_1st_row_name-current_support_digits))
                ret += "%d" % round(report[classname][metric],0)
        ret += '\n'
    ret += '\n'

    return ret

## Compute average

In [None]:
if not TRAIN_ONLY_ON_FULL_DATASET:
    if USE_CROSS_VALIDATION:
        assert len(evaluation_metrics['accuracy']) == K_SPLITS, "The number of accuracy values does not match the number of folds ({} != {})".format(len(evaluation_metrics['accuracy']),K_SPLITS)
        
        averaged_classification_reports = report_average(evaluation_metrics["classification_report"])
        macro_avg_f1_score = averaged_classification_reports["macro avg"]["f1-score"]
        average_fold_accuracy = averaged_classification_reports["accuracy"]
        printable_avg_report = classification_report_dict2print(averaged_classification_reports)
        qm_printable_avg_report = "Not performed"
        if DO_TEST_QUANTIZATION:
            qm_avg_report = report_average(quantized_model_evaluation_metrics["classification_report"])
            qm_printable_avg_report = classification_report_dict2print(qm_avg_report)
            fullqm_avg_report = report_average(fullquantized_model_evaluation_metrics["classification_report"])
            fullqm_printable_avg_report = classification_report_dict2print(fullqm_avg_report)
        metrics_to_save = {'macro_avg_f1score' : macro_avg_f1_score,\
                        'average_fold_accuracy' : average_fold_accuracy,\
                        'DICTavg_classification_report' : averaged_classification_reports,\
                        'avg_classification_report' : printable_avg_report,\
                        'DICTavg_classification_report_for_quantized_model' : qm_avg_report,\
                        'avg_classification_report_for_quantized_model' : qm_printable_avg_report,\
                        'DICTavg_classification_report_for_fullquantized_model' : fullqm_avg_report,\
                        'avg_classification_report_for_fullquantized_model' : fullqm_printable_avg_report}
    else:
        assert len(evaluation_metrics['accuracy']) == 1
        metrics_to_save = {}
        for metric in evaluation_metrics.keys():
            metrics_to_save[metric] = evaluation_metrics[metric][0]
        for metric in quantized_model_evaluation_metrics.keys():
            metrics_to_save['quantizedmod_'+str(metric)] = quantized_model_evaluation_metrics[metric][0]


# Save Model Info

In [None]:
if not TRAIN_ONLY_ON_FULL_DATASET:
    if SAVE_MODEL_INFO:
        current_dir = MODELFOLDER + "/" + RUN_NAME
        # %mkdir -p "$current_dir"
        os.makedirs(current_dir,exist_ok = True)

        save_model_info(result_model[0] if type(result_model) == list else result_model,
                        optimizer,
                        USE_CROSS_VALIDATION,K_SPLITS,
                        metrics_to_save,
                        current_dir,
                        _additional_info_to_save = additional_info_to_save,)
    else:
        print("RESULTS\n\n" + metrics_to_save)

# Train final model on the entire dataset

In [None]:
run_dir = os.path.join(MODELFOLDER,RUN_NAME)
assert os.path.exists(run_dir)

In [None]:
if TRAIN_ONLY_ON_FULL_DATASET:
    TRAIN_FINAL_MODEL = True

if TRAIN_FINAL_MODEL:
    use_early_stopping = False

    X_finmod,y_finmod = np.copy(X),np.copy(y)

    if USE_AUGMENTED_DATA:
        X_aug,y_aug = features_aug.to_numpy(), labels_aug.to_numpy()
        assert np.shape(X)[1] == np.shape(X_aug)[1], "The number of features in the augmented data does not match the number of features in the original data"
        print('Using augmented data to TRAIN THE FINAL MODEL')
        X_finmod = np.concatenate((X_finmod,X_aug),axis=0)
        y_finmod = np.concatenate((y_finmod,y_aug),axis=0)
        assert len(X_finmod) > len(X), "The number of samples in the augmented data is not greater than the number of samples in the original data"
        assert len(X_finmod) == len(y_finmod), "The number of samples in the augmented data does not match the number of labels in the original data"
        assert np.shape(X_finmod)[1] == np.shape(X_aug)[1], "The number of features in the augmented data does not match the number of features in the original data"

    ### DEFINE MODEL
    final_model = define_model_architecture(len(CLASSES),_verbose = True)
    loss_fn = get_loss()

    ### Normalize data if needed
    if DO_NORMALIZE_DATA:
        scaler = SCALER_TO_USE
        X_finmod = scaler.fit_transform(X_finmod)

        # Save final scaler and parameters
        with open(os.path.join(run_dir,'scaler.pickle'),'wb') as sf:
            pickle.dump(scaler,sf)
        with open(os.path.join(run_dir,'info.txt'),'a') as infof:
            if type(SCALER_TO_USE) == MinMaxScaler:
                infof.write("The scaler used was sklearn.preprocessing MinMaxScaler\nScaler parameters:\n")

                infof.write('MinMaxScaler().data_min_: '        + str(SCALER_TO_USE.data_min_)+'\n')
                infof.write('MinMaxScaler().data_max_: '        + str(SCALER_TO_USE.data_max_)+'\n')
                infof.write('MinMaxScaler().data_range_: '      + str(SCALER_TO_USE.data_range_)+'\n')
                infof.write('MinMaxScaler().scale_: '           + str(SCALER_TO_USE.scale_)+'\n')
                infof.write('MinMaxScaler().n_samples_seen_: '  + str(SCALER_TO_USE.n_samples_seen_)+'\n')
            elif type(SCALER_TO_USE) == StandardScaler:
                infof.write("The scaler used was sklearn.preprocessing StandardScaler\nScaler parameters:\n")
                infof.write('StandardScaler().mean_: '          + str(SCALER_TO_USE.mean_)+'\n')
                infof.write('StandardScaler().var_: '           + str(SCALER_TO_USE.var_)+'\n')
                infof.write('StandardScaler().scale_: '         + str(SCALER_TO_USE.scale_)+'\n')
                infof.write('StandardScaler().n_samples_seen_: '+ str(SCALER_TO_USE.n_samples_seen_)+'\n')
            else:
                raise Exception("\"%s\" scaler not supported"%(SCALER_TO_USE))

    ### PREPARE DATA IN CASE OF A FIRST CONV LAYER IN THE NET (1D or 2D)
    if type(final_model.layers[0]) == tf.keras.layers.Conv1D and not WINDOWED_INPUT_MODE == WindowedInputMode._2D:
        X_all = dataTo1DConv(X_finmod)
    elif WINDOWED_INPUT_MODE == WindowedInputMode._2D:
        print(np.shape(X_finmod))
        # raise Exception("2D Convolutional layers not supported yet")
        X_all = dataTo2DConv(X_finmod)
    else:
        X_all = X_finmod

    
    with open(os.path.join(run_dir,'check.txt'),'w') as sf:
        print('Training final model',file=sf)
        print(np.shape(X_all),file=sf)

    ### COMPILE MODEL
    compile_model(final_model,optimizer,loss_fn,_verbose = True)

    ### SETUP TENSORBOARD
    # tensorboard_callback = start_tensorboard(tb_dir,None)
    # callbacks=[tensorboard_callback,]
    callbacks=[]

    ### SETUP EARLY STOPPING (only if not in K-fold mode)
    if use_early_stopping:
        callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='min', verbose=1, patience=200))

    # * FIT MODEL *
    final_model.fit(X_all, y, epochs=args['epochs'],
                    callbacks=callbacks,
                    batch_size=args['batchsize'])

In [None]:
if TRAIN_FINAL_MODEL:
    final_model_dir = MODELFOLDER + "/" + RUN_NAME + "/finalModel"
    # %mkdir -p "$final_model_dir"
    os.makedirs(final_model_dir,exist_ok = True)

    final_model.save(final_model_dir)

    # Convert and save lite model (Non quantized)
    convert2tflite(final_model_dir,model_name='final_model',quantization=None)
    # Convert and save lite model (Dynamically quantized)
    convert2tflite(final_model_dir,model_name='final_model_dynquant',quantization="dynamic")

In [None]:
# if TEST_WITH_EXTRA_DATA:
#     # extratest_featuredataset
#     ex_metadata, ex_features, ex_labels = divide_dataset(extratest_featuredataset)



## Rename current output folder by prefixing the accuracy value

In [None]:
# Make window folder
window_folder = os.path.join(os.path.dirname(run_dir),FEATURE_WINDOW_SIZE.name)
if not os.path.exists(window_folder):
    os.mkdir(window_folder)
problem_folder = os.path.join(window_folder,classification_task.value[1])
if not os.path.exists(problem_folder):
    os.mkdir(problem_folder)

# if not GUITAR_GROUP_K_FOLD_VALIDATION and USE_CROSS_VALIDATION:
#     problem_folder = os.path.join(problem_folder,'NonSeparatedGuitars')
#     if not os.path.exists(problem_folder):
#         os.mkdir(problem_folder)

# Rename folder
if os.path.exists(os.path.join(run_dir,'info.txt')):
    metrics_prefix = 'c_' if 'CrossValidated' in os.path.basename(run_dir) else ''

    # metrics_prefix += 'maf1_%.4f_acc_%.4f_'%(round(metrics_to_save['macro_avg_f1score'],4),round(metrics_to_save['average_fold_accuracy'],4))

    window_prefix = FEATURE_WINDOW_SIZE.name

    problem_prefix = classification_task.value[1]+'_'

    limited_study_prefix = ''
    if classification_task == ClassificationTask.LIMITED_DATASET_NUMBER_FULL_8:
        limited_study_prefix = str(len(WHICH_GUITAR_PLAYER_COMBOS_TO_USE))+'GPpairs'

    newfoldername = os.path.join(problem_folder,'_'.join(WHICH_GUITAR_PLAYER_COMBOS_TO_USE)+'_'+limited_study_prefix+window_prefix+problem_prefix+metrics_prefix+os.path.basename(run_dir))
    # print('Renaming "'+run_dir+'" to "'+newfoldername+'"')
    os.rename(run_dir,newfoldername)
    run_dir = newfoldername
else:
    errfoldername = os.path.join(os.path.dirname(run_dir),'ERR_'+os.path.basename(run_dir))
    os.rename(run_dir,errfoldername)
    run_dir = errfoldername

print("Renamed folder to \""+run_dir+"\"")

In [None]:
if TRAIN_FINAL_MODEL and TEST_WITH_EXTRA_DATA:
    """ Divide the test data into metadata, features and labels (like the train/test dataset) """

    test_metadata, test_features, test_labels = divide_dataset(extratest_featuredataset)
    assert test_metadata.shape[1] == 9

    """ Filter the dataset according to the task """
    # This might mean removing samples or renaming classes
    test_features,test_labels,test_metadata = filter_dataset(test_features.copy(),test_labels.copy(),test_metadata.copy(),classification_task)

    ## Extract all features, labels and metadata where (indexes) 'luctur' is contained in meta_audiofilePath of metadata
    luctur_indexes = test_metadata['meta_audiofilePath'].str.contains('luctur')
    luctur_test_features = test_features.copy().loc[luctur_indexes,:].reset_index(drop=True)
    luctur_test_labels = test_labels.copy().loc[luctur_indexes].reset_index(drop=True)
    luctur_test_metadata = test_metadata.copy().loc[luctur_indexes,:].reset_index(drop=True)
    assert len(luctur_indexes) > 0

    thobor_indexes = test_metadata['meta_audiofilePath'].str.contains('thobor')
    thobor_test_features = test_features.copy().loc[thobor_indexes,:].reset_index(drop=True)
    thobor_test_labels = test_labels.copy().loc[thobor_indexes].reset_index(drop=True)
    thobor_test_metadata = test_metadata.copy().loc[thobor_indexes,:].reset_index(drop=True)
    assert len(thobor_indexes) > 0

    """ Apply the feature selection computed for the train/test set (like the train/test dataset) """

    # test_features = test_features.copy().loc[:,selected_features]
    thobor_test_features = thobor_test_features.copy().loc[:,selected_features]
    luctur_test_features = luctur_test_features.copy().loc[:,selected_features]

    MULT = 1 if not DATA_IS_WINDOWED else len(WINDOW_INDEXES)
    if len(selected_features) != AUTO_FEATURE_NUMBER*MULT:
        raise Exception('The number of selected_features ('+str(len(selected_features))+') is not the same as AUTO_FEATURE_NUMBER ('+str(AUTO_FEATURE_NUMBER*MULT)+'). Check the code.')

    if (thobor_test_features.shape[1] != AUTO_FEATURE_NUMBER*MULT) or (luctur_test_features.shape[1] != AUTO_FEATURE_NUMBER*MULT):
        raise Exception('The number of features in the test dataset ('+str(test_features.shape[1])+') is different from the number of features in the train/test dataset ('+str(AUTO_FEATURE_NUMBER*MULT)+')')
    del test_features

In [None]:
if TRAIN_FINAL_MODEL and TEST_WITH_EXTRA_DATA:
    for name,features,labels in [('thobor',thobor_test_features,thobor_test_labels),('luctur',luctur_test_features,luctur_test_labels)]:
        extra_test_x = features.to_numpy()
        extra_test_y = labels.to_numpy()

        

        if DO_NORMALIZE_DATA:
            # This scaler was "learned" a few cells above on the whole train/test dataset (obviously exluding the etra-test data)
            extra_test_x = scaler.transform(extra_test_x)

        if type(final_model.layers[0]) == tf.keras.layers.Conv1D and not WINDOWED_INPUT_MODE == WindowedInputMode._2D:
            extra_test_x = dataTo1DConv(extra_test_x)
        elif WINDOWED_INPUT_MODE == WindowedInputMode._2D:
            print(np.shape(extra_test_x))
            # raise Exception("2D Convolutional layers not supported yet")
            extra_test_x = dataTo2DConv(extra_test_x)


        y_true = np.squeeze(extra_test_y)
        y_pred = np.argmax(final_model(extra_test_x),axis=1)
        cm_acc, f1mw, cm_conf_matrix, cm_classf_report, cm_printable_classf_report = compute_metrics(y_true, y_pred, _verbose=False)

        outfilename = os.path.join(run_dir,name+'_results.txt')
        with open(outfilename,'w') as infof:
            infof.write('______________________________________________________________________________________________________________________________________________________\n\n\n')
            infof.write('+----------------------------------------------------------------+\n')
            infof.write('| Results obtained on extra test recordings with the FINAL MODEL |\n')
            infof.write('| With guitarist: '+name+' \n')
            infof.write('+----------------------------------------------------------------+\n\n')
            infof.write('Extra-test-Accuracy: '+str(cm_acc)+'\n\n')
            infof.write('Extra-test-F1 Score (weighted average): '+str(f1mw)+'\n\n')
            infof.write('Extra-test-ConfusionMatrix: \n'+str(cm_conf_matrix)+'\n\n')
            infof.write('Extra-test-Report: \n'+str(cm_printable_classf_report)+'\n\n')
            infof.write('DICTExtra-test-Report:\n'+str(cm_classf_report)+'\n\n')
        print('Resuts wrote to "'+outfilename+'"')

In [None]:
print('*--* Training successfully completed. *--*')
print("Data at",run_dir)

In [None]:
def send_to_telegram(message):
    if os.path.exists('TELEGRAM_API_TOKEN.key') and os.path.exists('TELEGRAM_BOT_CHATID.key'):
        with open('TELEGRAM_API_TOKEN.key') as tatf, open('TELEGRAM_BOT_CHATID.key') as tbcf:
            apiToken = tatf.read().strip()
            chatID = tbcf.read().strip()
    else:
        print('No Telegram API token or chat ID found. Please create a file named TELEGRAM_API_TOKEN.key and TELEGRAM_BOT_CHATID.key in the root of your project.')
        return
    apiURL = f'https://api.telegram.org/bot{apiToken}/sendMessage'

    try:
        response = requests.post(apiURL, json={'chat_id': chatID, 'text': message})
        # print(response.text)
    except Exception as e:
        print(e)


send_to_telegram('!Training completed in folder: "' + run_dir+'"!')


# check if 'best_notified_accyracy_yet.txt' exists
# if it does, read the value, compare to the current and notify via telegram if the current is better
# if it doesn't, create it and write the current value

def save_and_notify_if_greater(newvalue, metricfilepath, metric_name, problem_name,window_name):
    BA_FILE = metricfilepath
    if os.path.exists(BA_FILE):
        print('Reading best notified '+metric_name+' from file '+BA_FILE)
        with open(BA_FILE) as f:
            best_notified_metric_yet = float(f.readline())
    else:
        print('No best notified '+metric_name+' file found. Creating new one.')
        best_notified_metric_yet = 0.0

    if newvalue > best_notified_metric_yet:
        print('New best '+metric_name+' found: '+str(newvalue)+ ' (old best: '+str(best_notified_metric_yet)+')')
        message = str(newvalue) + '\nfrom run in folder ' + run_dir
        with open(BA_FILE,'w') as f:
            f.write(message)
        # send_to_telegram('New best %s: %.2f%%\nfrom run in folder %s'%(metric_name,round(newvalue,4)*100,run_dir))
        send_to_telegram('FOR PROBLEM: %s\nWITH WINDOW:%s\nNew best %s: %.2f%%'%(problem_name,window_name,metric_name,round(newvalue,4)*100))
    return newvalue if newvalue > best_notified_metric_yet else best_notified_metric_yet

if not TRAIN_ONLY_ON_FULL_DATASET:
    best_afa = save_and_notify_if_greater(metrics_to_save['average_fold_accuracy'], os.path.join(problem_folder,FEATURE_WINDOW_SIZE.name+'best_accuracy_yet_notified.txt'), 'Accuracy',classification_task.value[1],FEATURE_WINDOW_SIZE.name)
    best_maf1 = save_and_notify_if_greater(metrics_to_save['macro_avg_f1score'], os.path.join(problem_folder,FEATURE_WINDOW_SIZE.name+'best_maf1_yet_notified.txt'), 'Macro Average F1-Score',classification_task.value[1],FEATURE_WINDOW_SIZE.name)

### Write results to file
to quickly preview them without scrolling

In [None]:
if not TRAIN_ONLY_ON_FULL_DATASET:
    last_metrics_filepath = './last_metrics.txt'

    prev_avg_acc = 0.0
    prev_mavg_f1 = 0.0
    if os.path.exists(last_metrics_filepath):
        with open(last_metrics_filepath,'r') as f:
            lines = f.readlines()
            if len(lines) > 0 and lines[0].strip() != '':
                prev_avg_acc = float(re.findall('\d\d\.\d\d%',lines[2])[0].rstrip('%'))
                prev_mavg_f1 = float(re.findall('\d\d\.\d\d%',lines[3])[0].rstrip('%'))

    afa =  round(metrics_to_save['average_fold_accuracy']*100,2)
    maf1 = round(metrics_to_save['macro_avg_f1score']*100,2)
    if afa != prev_avg_acc or maf1 != prev_mavg_f1:
        with open(last_metrics_filepath,'w') as f:
            print('| Time:  '+strftime("%H:%M - %Y/%m/%d")+'                |', file = f)
            print('|                  last  |  prev  |   best |', file = f)
            print('| Avg. Accuracy:  %.2f%% | %.2f%% | %.2f%% |'%(afa,prev_avg_acc,round(best_afa*100,2)), file = f)
            print('| MAvg. F1Score:  %.2f%% | %.2f%% | %.2f%% |'%(maf1,prev_mavg_f1,round(best_maf1*100,2)), file = f)