<a href='https://colab.research.google.com/github/domenicostefani/timbre-classifier/blob/main/expressive-technique-classifier-phase3.ipynb' target='_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Phase3 - Model Tester
Test with extra recording data


In [63]:
MODEL_DIR = 'output/c_acc0.5784_CrossValidatedRun_20221010-162601'
import os
assert os.path.exists(MODEL_DIR)

## Import modules and mount drive folder

In [64]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import sys
import datetime
import pandas as pd
import numpy as np
import re
import tensorflow as tf
from tensorflow import keras
print("Tensorflow version: " + tf.version.VERSION)
from packaging import version
python_version = re.findall('\d+\.\d+\.\d+',sys.version)[0]
if version.parse(python_version) <= version.parse("3.8.3"):
    print("Python version ('"+python_version+"') is less than 3.8.3")
    import pickle5 as pickle
else: 
    import pickle
import shutil
from sklearn.metrics import f1_score
from sklearn import metrics
from typing import Tuple

global_random_state = 42
np.random.seed(global_random_state)
tf.random.set_seed(global_random_state)

COLAB = 'google.colab' in str(get_ipython())

DATAFOLDER = 'data'

Tensorflow version: 2.4.1


In [65]:
physical_devices = tf.config.list_physical_devices('GPU') 

for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

print(physical_devices)
assert len(tf.config.experimental.list_physical_devices('GPU')) >= 1

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [66]:
def drop_unused_features(features_df: pd.DataFrame, inplace = False) -> pd.DataFrame:
    if not inplace:
        res_df = features_df.copy()
    else:
        res_df = features_df
    if 'attackTime_peaksamp'       not in res_df.columns.to_list() or\
       'attackTime_attackStartIdx' not in res_df.columns.to_list() or\
       'peakSample_index'          not in res_df.columns.to_list():
       raise Exception("The features dataframe does not contain the required columns!")

    res_df.drop(columns=['attackTime_peaksamp',\
                                'attackTime_attackStartIdx',\
                                'peakSample_index'], inplace=True)
    return res_df

In [67]:
# Extract separate DFs
# Divide dataset into metadata, features and labels
def divide_dataset(features_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    metadata = features_df.filter(regex='^meta_',axis=1)
    labels = features_df.meta_expressive_technique_id
    features = features_df.loc[:,[col for col in features_df.columns if col not in metadata.columns]]
    # Convert to numeric formats where possible (somehow convert_dtypes doesn't work [https://stackoverflow.com/questions/65915048/pandas-convert-dtypes-not-working-on-numbers-marked-as-objects])
    metadata = metadata.apply(pd.to_numeric, errors='ignore')
    labels = labels.apply(pd.to_numeric, errors='ignore')
    features = features.apply(pd.to_numeric, errors='ignore')
    return metadata, features, labels

## Subset features

In [68]:
selected_features = None
assert os.path.exists(os.path.join(MODEL_DIR,'info.txt'))
with open(os.path.join(MODEL_DIR,'info.txt')) as infofile:
    for idx,line in enumerate(infofile.readlines()):
        if 'Selected features: ' in line:
            s = line
            s = re.findall('(?<=Selected features: ).*',s)[0]
            s = re.findall('(?<=\[).*(?=\])',s)[0].replace('\'','').replace(' ','').split(',')
            selected_features = s
assert selected_features is not None

print('Read features list from file: \n'+str(selected_features))

Read features list from file: 
['peakSample_value', 'attackTime_value', 'mfcc_7', 'bfcc_7', 'bfcc_3', 'bfcc_4', 'mfcc_3', 'cepstrum_1', 'mfcc_4', 'bfcc_32', 'barkSpec_3', 'cepstrum_5', 'mfcc_5', 'bfcc_5', 'bfcc_6', 'barkSpec_2', 'mfcc_8', 'bfcc_9', 'bfcc_17', 'cepstrum_2', 'barkSpec_7', 'mfcc_10', 'barkSpec_17', 'bfcc_20', 'barkSpec_6', 'barkSpec_8', 'bfcc_30', 'mfcc_34', 'mfcc_6', 'bfcc_10', 'bfcc_34', 'bfcc_8', 'bfcc_18', 'bfcc_22', 'mfcc_9', 'barkSpecBrightness', 'barkSpec_4', 'mfcc_26', 'mfcc_25', 'mfcc_11', 'mfcc_23', 'barkSpec_18', 'barkSpec_5', 'bfcc_16', 'mfcc_27', 'barkSpec_10', 'mfcc_21', 'bfcc_12', 'bfcc_21', 'mfcc_22']


In [69]:
def macroweighted_f1(y_true,y_pred):
    f1scores = []
    numSamples = []
    for selclass in CLASSES:
        classSelection = (y_true == (np.ones(np.shape(y_true)[0])*selclass))
        numSamples.append(sum(classSelection))
        classPrediction = (y_pred == (np.ones(np.shape(y_true)[0])*selclass))
        true_positives = np.sum(np.logical_and(classSelection,(y_true == y_pred)))

        precision = 1.0 * true_positives / np.sum(classPrediction)
        recall = 1.0 * true_positives / np.sum(classSelection)
        f1score = 2 /((1/precision)+(1/recall))
        f1scores.append(f1score)
    macroWeightedF1 = sum(np.array(f1scores) * np.array(numSamples)) / sum(numSamples)
    return macroWeightedF1

def compute_metrics(y_true, y_pred,_verbose = False):
    accuracy = np.sum(y_pred == y_true)/np.shape(y_true)[0]
    f1mw = macroweighted_f1(y_true,y_pred)
    confusion_matrix = metrics.confusion_matrix(y_true, y_pred)
    
    classification_report = metrics.classification_report(y_true, y_pred, digits=6,target_names = CLASSES_DESC.values(),output_dict=True)
    printable_classification_report = metrics.classification_report(y_true, y_pred, digits=4,target_names = CLASSES_DESC.values())

    if _verbose:
        print("Test Accuracy: " + str(accuracy) + "\nTest macro_weighted_avg f1-score: " + str(f1mw)+'\n'+str(confusion_matrix)+'\n'+str(printable_classification_report))

    return accuracy, f1mw, confusion_matrix, classification_report, printable_classification_report

# Load the model

In [70]:
model_path = os.path.join(MODEL_DIR,'finalModel')
assert os.path.exists(model_path)
final_model = tf.keras.models.load_model(model_path)

print(final_model.summary())

Model: "guitar_timbre_classifier_20221010-170918"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_50 (Dense)             (None, 80)                4080      
_________________________________________________________________
batch_normalization_40 (Batc (None, 80)                320       
_________________________________________________________________
dropout_40 (Dropout)         (None, 80)                0         
_________________________________________________________________
dense_51 (Dense)             (None, 80)                6480      
_________________________________________________________________
batch_normalization_41 (Batc (None, 80)                320       
_________________________________________________________________
dropout_41 (Dropout)         (None, 80)                0         
_________________________________________________________________
dense_52 (Dense)          

## Testing with extra test data
This data was extracted from extra recordings, made to test the system in a real life scenario.  
Here we test only to veryfy that everything is working here, before making a shift to the real life test

In [71]:
""" Load the test data """

TEST_DATA_FILE_PATH = os.path.join(DATAFOLDER,'phase3','20221011_110715_test_onlycorrectdetections.pickle')
print("Loading test data from pickle...")
with open(TEST_DATA_FILE_PATH,'rb') as pf:
    testdataset = pickle.load(pf)
testdataset.sort_values(['meta_expressive_technique_id','meta_audiofilePath'],inplace = True)
print('Done.')
# If this fails, the dataset has changed from the last time the program was run successfully (CHECK THE DATA!!!)
assert testdataset.shape == (754,507)
# display(testdataset)


""" Drop unused features (like the train/test dataset) """

drop_unused_features(testdataset,inplace=True)
assert testdataset.shape == (754,504)


""" Divide the test data into metadata, features and labels (like the train/test dataset) """

test_metadata, test_features, test_labels = divide_dataset(testdataset)
assert test_metadata.shape[0] == test_features.shape[0] == test_labels.shape[0] == 754
assert test_metadata.shape[1] == 9
assert test_features.shape[1] == 495


""" Apply the feature selection computed for the train/test set (like the train/test dataset) """

test_features = test_features.copy().loc[:,selected_features]

assert len(selected_features) == final_model.layers[0].get_input_at(0).get_shape().as_list()[1]

Loading test data from pickle...
Done.


In [72]:
CLASSES_DESC = {0:"Kick",
                1:"Snare 1",
                2:"Tom",
                3:"Snare 2",
                4:"Natural Harmonics",
                5:"Palm Mute",
                6:"Pick Near Bridge",
                7:"Pick Over the Soundhole"}
CLASSES = list(CLASSES_DESC.keys())

assert np.equal(np.sort(CLASSES),np.sort(pd.unique(test_labels))).all()

In [73]:
DO_WRITE_TO_FILE = True

extra_test_x = test_features.to_numpy()
extra_test_y = test_labels.to_numpy()


y_true = np.squeeze(extra_test_y)
y_pred = np.argmax(final_model(extra_test_x),axis=1)
cm_acc, f1mw, cm_conf_matrix, cm_classf_report, cm_printable_classf_report = compute_metrics(y_true, \
                                                                                             y_pred, \
                                                                                             _verbose=False)
if DO_WRITE_TO_FILE:
    infof = open(os.path.join(MODEL_DIR,'info.txt'),'a')
    writefunc = infof.write
else:
    writefunc = print

writefunc('______________________________________________________________________________________________________________________________________________________\n\n\n')
writefunc('+----------------------------------------------------------------+\n')
writefunc('| Results obtained on extra test recordings with the FINAL MODEL |\n')
writefunc('+----------------------------------------------------------------+\n\n')
writefunc('Extra-test-Accuracy: '+str(cm_acc)+'\n\n')
writefunc('Extra-test-F1 Score (weighted average): '+str(f1mw)+'\n\n')
writefunc('Extra-test-ConfusionMatrix: \n'+str(cm_conf_matrix)+'\n\n')
writefunc('Extra-test-Report: \n'+str(cm_printable_classf_report)+'\n\n')


if DO_WRITE_TO_FILE:
    infof.close()

  precision = 1.0 * true_positives / np.sum(classPrediction)
  f1score = 2 /((1/precision)+(1/recall))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [74]:
!code $MODEL_DIR/info.txt