# Load functions

In [1]:
import numpy as np
import pandas as pd
import keras
from keras import layers
import keras_tuner as kt
import datetime
import os
import tensorflow as tf
import gc
import glob
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import warnings


os.environ["KERAS_BACKEND"] = "tensorflow"

from prepData import prepData_STM, prepData_VGG, prepData_YAM, prepData_AST, prepData_melspectrogram, mask_STMmatrix



def eval_model(model, test_dataset):
    # search the max F1 score across thresholds
    macroF1_list = []
    for threshold in range(5,100,5):
        macroF1_list.append(keras.metrics.F1Score(average="macro", threshold=threshold/100, name="macro_f1_score_"+str(threshold), dtype=None))
    
    ROCAUC = keras.metrics.AUC(curve="ROC", name="ROC-AUC")
    PRAUC = keras.metrics.AUC(curve="PR", name="PR-AUC")
    
    model.compile(metrics=[ROCAUC, PRAUC,'accuracy']+macroF1_list)
    evaluation = model.evaluate(test_dataset)
    max_threshold = (np.argmax(evaluation[4:])+1)*0.05
    
    df_f1 = eval_model_classF1(model, test_dataset, max_threshold)
    
    flat_data = evaluation[:3] + [max(evaluation[4:])] + [evaluation[3]] +[max_threshold]
    # Define column names
    columns = ['loss', 'ROC-AUC', 'PR-AUC', 'max_macro_f1', 'accuracy', 'max_f1_threshold']
    # Create DataFrame
    df = pd.DataFrame([flat_data], columns=columns)
    df_all = pd.concat([df, df_f1], axis=1)
    return df_all

def eval_model_classF1(model, test_dataset, threshold):
    macroF1_list = []
    macroF1_list.append(keras.metrics.F1Score(average=None, threshold=threshold))
    
    model.compile(metrics=macroF1_list)
    evaluation = model.evaluate(test_dataset)
    columns = ['speech: nontonal', 'speech: tonal', 'music: vocal', 'music: nonvocal', 'env: urban', 'env: wildlife']
    df = pd.DataFrame([list(evaluation[1].numpy())], columns=columns)
    return df

def path_finder(base_path):
    # Define the base path and the pattern
    pattern = os.path.join(base_path, 'MLP_*/best_model0.keras')

    # Find all matching paths
    matching_paths = glob.glob(pattern)

    # Check if there are multiple matches
    if len(matching_paths) > 1:
        warnings.warn('Multiple paths found. Using the first one.')
    elif len(matching_paths) == 0:
        raise FileNotFoundError('No matching paths found.')
    else:
        return matching_paths[0]

2025-01-11 08:57:57.770249: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# STM (PCA)

In [2]:
_, _, test_dataset_STM, n_feat_STM, n_target = prepData_STM(n_pca=1024)
_, _, test_dataset_STM_ds, n_feat_STM, n_target = prepData_STM(ds_nontonal_speech=True, n_pca=1024)

model_STM_dropout_F1 = keras.saving.load_model(path_finder("model/STM/MLP_corpora_categories/PCA/Dropout/macroF1"))
model_STM_dropout_AUC = keras.saving.load_model(path_finder("model/STM/MLP_corpora_categories/PCA/Dropout/ROC-AUC"))
model_STM_LN_F1 = keras.saving.load_model(path_finder("model/STM/MLP_corpora_categories/PCA/LayerNormalization/macroF1"))
model_STM_LN_AUC = keras.saving.load_model(path_finder("model/STM/MLP_corpora_categories/PCA/LayerNormalization/ROC-AUC"))

model_STM_dropout_F1_ds = keras.saving.load_model(path_finder("model/STM/MLP_corpora_categories/PCA/Dropout/macroF1/downsample"))
model_STM_dropout_AUC_ds = keras.saving.load_model(path_finder("model/STM/MLP_corpora_categories/PCA/Dropout/ROC-AUC/downsample"))
model_STM_LN_F1_ds = keras.saving.load_model(path_finder("model/STM/MLP_corpora_categories/PCA/LayerNormalization/macroF1/downsample"))
model_STM_LN_AUC_ds = keras.saving.load_model(path_finder("model/STM/MLP_corpora_categories/PCA/LayerNormalization/ROC-AUC/downsample"))


STM_output/corpSTMnpy/BibleTTS-akuapem-twi_STMall.npy
STM_output/corpSTMnpy/BibleTTS-asante-twi_STMall.npy
STM_output/corpSTMnpy/BibleTTS-ewe_STMall.npy
STM_output/corpSTMnpy/BibleTTS-hausa_STMall.npy
STM_output/corpSTMnpy/BibleTTS-lingala_STMall.npy
STM_output/corpSTMnpy/BibleTTS-yoruba_STMall.npy
STM_output/corpSTMnpy/Buckeye_STMall.npy
STM_output/corpSTMnpy/EUROM_STMall.npy
STM_output/corpSTMnpy/HiltonMoser2022_speech_STMall.npy
STM_output/corpSTMnpy/LibriSpeech_STMall.npy
STM_output/corpSTMnpy/MediaSpeech-AR_STMall.npy
STM_output/corpSTMnpy/MediaSpeech-ES_STMall.npy
STM_output/corpSTMnpy/MediaSpeech-FR_STMall.npy
STM_output/corpSTMnpy/MediaSpeech-TR_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-ab_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-ar_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-ba_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-be_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-bg_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-bn_STMall.n

  target.replace({


Good to go!
STM_output/corpSTMnpy/BibleTTS-akuapem-twi_STMall.npy
STM_output/corpSTMnpy/BibleTTS-asante-twi_STMall.npy
STM_output/corpSTMnpy/BibleTTS-ewe_STMall.npy
STM_output/corpSTMnpy/BibleTTS-hausa_STMall.npy
STM_output/corpSTMnpy/BibleTTS-lingala_STMall.npy
STM_output/corpSTMnpy/BibleTTS-yoruba_STMall.npy
STM_output/corpSTMnpy/Buckeye_STMall.npy
STM_output/corpSTMnpy/EUROM_STMall.npy
STM_output/corpSTMnpy/HiltonMoser2022_speech_STMall.npy
STM_output/corpSTMnpy/LibriSpeech_STMall.npy
STM_output/corpSTMnpy/MediaSpeech-AR_STMall.npy
STM_output/corpSTMnpy/MediaSpeech-ES_STMall.npy
STM_output/corpSTMnpy/MediaSpeech-FR_STMall.npy
STM_output/corpSTMnpy/MediaSpeech-TR_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-ab_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-ar_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-ba_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-be_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice-bg_STMall.npy
STM_output/corpSTMnpy/MozillaCommonVoice

  target.replace({


STM data and meta data mismatched!


In [3]:
eval_STM_dropout_F1 = eval_model(model_STM_dropout_F1, test_dataset_STM)
eval_STM_dropout_F1['model'] = 'STM_dropout_F1'
eval_STM_dropout_AUC = eval_model(model_STM_dropout_AUC, test_dataset_STM)
eval_STM_dropout_AUC['model'] = 'STM_dropout_AUC'

eval_STM_LN_F1 = eval_model(model_STM_LN_F1, test_dataset_STM)
eval_STM_LN_F1['model'] = 'STM_LN_F1'
eval_STM_LN_AUC = eval_model(model_STM_LN_AUC, test_dataset_STM)
eval_STM_LN_AUC['model'] = 'STM_LN_AUC'

eval_STM_dropout_F1_ds = eval_model(model_STM_dropout_F1_ds, test_dataset_STM_ds)
eval_STM_dropout_F1_ds['model'] = 'STM_dropout_F1_ds'
eval_STM_dropout_AUC_ds = eval_model(model_STM_dropout_AUC_ds, test_dataset_STM_ds)
eval_STM_dropout_AUC_ds['model'] = 'STM_dropout_AUC_ds'

eval_STM_LN_F1_ds = eval_model(model_STM_LN_F1_ds, test_dataset_STM_ds)
eval_STM_LN_F1_ds['model'] = 'STM_LN_F1_ds'
eval_STM_LN_AUC_ds = eval_model(model_STM_LN_AUC_ds, test_dataset_STM_ds)
eval_STM_LN_AUC_ds['model'] = 'STM_LN_AUC_ds'

df_STM_eval = pd.concat([
    eval_STM_dropout_F1,
    eval_STM_dropout_AUC,
    eval_STM_LN_F1,
    eval_STM_LN_AUC,
    eval_STM_dropout_F1_ds,
    eval_STM_dropout_AUC_ds,
    eval_STM_LN_F1_ds,
    eval_STM_LN_AUC_ds,
    ], ignore_index=True)

time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")

df_STM_eval.to_csv("model/MLP_summary_STM_"+time_stamp+".csv", index=False)

[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - PR-AUC: 0.9372 - ROC-AUC: 0.9875 - accuracy: 0.8743 - loss: 0.0333 - macro_f1_score_10: 0.6612 - macro_f1_score_15: 0.6904 - macro_f1_score_20: 0.7195 - macro_f1_score_25: 0.7480 - macro_f1_score_30: 0.7737 - macro_f1_score_35: 0.7933 - macro_f1_score_40: 0.8038 - macro_f1_score_45: 0.8009 - macro_f1_score_5: 0.6181 - macro_f1_score_50: 0.7792 - macro_f1_score_55: 0.7399 - macro_f1_score_60: 0.6839 - macro_f1_score_65: 0.6060 - macro_f1_score_70: 0.5110 - macro_f1_score_75: 0.4090 - macro_f1_score_80: 0.3072 - macro_f1_score_85: 0.2037 - macro_f1_score_90: 0.1010 - macro_f1_score_95: 0.0321
[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 16ms/step - f1_score: 0.8087 - loss: 0.0333
[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 21ms/step - PR-AUC: 0.9354 - ROC-AUC: 0.9874 - accuracy: 0.8754 - loss: 0.0218 - macro_f1_score_10: 0.6506 - macro_f1_score_15: 0.6799 - macro_f1_

# VGGish

In [4]:
_, _, test_dataset_VGG, n_feat_VGG, n_target = prepData_VGG()
_, _, test_dataset_VGG_ds, n_feat_VGG, n_target = prepData_VGG(ds_nontonal_speech=True)


model_VGG_dropout_F1 = keras.saving.load_model(path_finder("model/VGGish/MLP_corpora_categories/Dropout/macroF1"))
model_VGG_dropout_AUC = keras.saving.load_model(path_finder("model/VGGish/MLP_corpora_categories/Dropout/ROC-AUC"))
model_VGG_LN_F1 = keras.saving.load_model(path_finder("model/VGGish/MLP_corpora_categories/LayerNormalization/macroF1"))
model_VGG_LN_AUC = keras.saving.load_model(path_finder("model/VGGish/MLP_corpora_categories/LayerNormalization/ROC-AUC"))

model_VGG_dropout_F1_ds = keras.saving.load_model(path_finder("model/VGGish/MLP_corpora_categories/Dropout/macroF1/downsample"))
model_VGG_dropout_AUC_ds = keras.saving.load_model(path_finder("model/VGGish/MLP_corpora_categories/Dropout/ROC-AUC/downsample"))
model_VGG_LN_F1_ds = keras.saving.load_model(path_finder("model/VGGish/MLP_corpora_categories/LayerNormalization/macroF1/downsample"))
model_VGG_LN_AUC_ds = keras.saving.load_model(path_finder("model/VGGish/MLP_corpora_categories/LayerNormalization/ROC-AUC/downsample"))


vggish_output/embeddings/BibleTTS-akuapem-twi_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-asante-twi_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-ewe_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-hausa_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-lingala_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-yoruba_vggishEmbeddings.npy
vggish_output/embeddings/Buckeye_vggishEmbeddings.npy
vggish_output/embeddings/EUROM_vggishEmbeddings.npy
vggish_output/embeddings/HiltonMoser2022_speech_vggishEmbeddings.npy
vggish_output/embeddings/LibriSpeech_vggishEmbeddings.npy
vggish_output/embeddings/MediaSpeech-AR_vggishEmbeddings.npy
vggish_output/embeddings/MediaSpeech-ES_vggishEmbeddings.npy
vggish_output/embeddings/MediaSpeech-FR_vggishEmbeddings.npy
vggish_output/embeddings/MediaSpeech-TR_vggishEmbeddings.npy
vggish_output/embeddings/MozillaCommonVoice-ab_vggishEmbeddings.npy
vggish_output/embeddings/MozillaCommonVoice-ar_vggishEmbeddings.npy
vggish_ou

  target.replace({


Good to go!
vggish_output/embeddings/BibleTTS-akuapem-twi_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-asante-twi_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-ewe_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-hausa_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-lingala_vggishEmbeddings.npy
vggish_output/embeddings/BibleTTS-yoruba_vggishEmbeddings.npy
vggish_output/embeddings/Buckeye_vggishEmbeddings.npy
vggish_output/embeddings/EUROM_vggishEmbeddings.npy
vggish_output/embeddings/HiltonMoser2022_speech_vggishEmbeddings.npy
vggish_output/embeddings/LibriSpeech_vggishEmbeddings.npy
vggish_output/embeddings/MediaSpeech-AR_vggishEmbeddings.npy
vggish_output/embeddings/MediaSpeech-ES_vggishEmbeddings.npy
vggish_output/embeddings/MediaSpeech-FR_vggishEmbeddings.npy
vggish_output/embeddings/MediaSpeech-TR_vggishEmbeddings.npy
vggish_output/embeddings/MozillaCommonVoice-ab_vggishEmbeddings.npy
vggish_output/embeddings/MozillaCommonVoice-ar_vggishEmbeddings.n

  target.replace({


Embedding data and meta data mismatched!


In [5]:
eval_VGG_dropout_F1 = eval_model(model_VGG_dropout_F1, test_dataset_VGG)
eval_VGG_dropout_F1['model'] = 'VGG_dropout_F1'
eval_VGG_dropout_AUC = eval_model(model_VGG_dropout_AUC, test_dataset_VGG)
eval_VGG_dropout_AUC['model'] = 'VGG_dropout_AUC'

eval_VGG_LN_F1 = eval_model(model_VGG_LN_F1, test_dataset_VGG)
eval_VGG_LN_F1['model'] = 'VGG_LN_F1'
eval_VGG_LN_AUC = eval_model(model_VGG_LN_AUC, test_dataset_VGG)
eval_VGG_LN_AUC['model'] = 'VGG_LN_AUC'

eval_VGG_dropout_F1_ds = eval_model(model_VGG_dropout_F1_ds, test_dataset_VGG_ds)
eval_VGG_dropout_F1_ds['model'] = 'VGG_dropout_F1_ds'
eval_VGG_dropout_AUC_ds = eval_model(model_VGG_dropout_AUC_ds, test_dataset_VGG_ds)
eval_VGG_dropout_AUC_ds['model'] = 'VGG_dropout_AUC_ds'

eval_VGG_LN_F1_ds = eval_model(model_VGG_LN_F1_ds, test_dataset_VGG_ds)
eval_VGG_LN_F1_ds['model'] = 'VGG_LN_F1_ds'
eval_VGG_LN_AUC_ds = eval_model(model_VGG_LN_AUC_ds, test_dataset_VGG_ds)
eval_VGG_LN_AUC_ds['model'] = 'VGG_LN_AUC_ds'

df_VGG_eval = pd.concat([
    eval_VGG_dropout_F1,
    eval_VGG_dropout_AUC,
    eval_VGG_LN_F1,
    eval_VGG_LN_AUC,
    eval_VGG_dropout_F1_ds,
    eval_VGG_dropout_AUC_ds,
    eval_VGG_LN_F1_ds,
    eval_VGG_LN_AUC_ds,
    ], ignore_index=True)

time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")

df_VGG_eval.to_csv("model/MLP_summary_VGG_"+time_stamp+".csv", index=False)

[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - PR-AUC: 0.9660 - ROC-AUC: 0.9930 - accuracy: 0.9107 - loss: 0.0107 - macro_f1_score_10: 0.7305 - macro_f1_score_15: 0.7622 - macro_f1_score_20: 0.7883 - macro_f1_score_25: 0.8117 - macro_f1_score_30: 0.8341 - macro_f1_score_35: 0.8538 - macro_f1_score_40: 0.8652 - macro_f1_score_45: 0.8677 - macro_f1_score_5: 0.6853 - macro_f1_score_50: 0.8586 - macro_f1_score_55: 0.8378 - macro_f1_score_60: 0.8067 - macro_f1_score_65: 0.7598 - macro_f1_score_70: 0.7077 - macro_f1_score_75: 0.6448 - macro_f1_score_80: 0.5584 - macro_f1_score_85: 0.4424 - macro_f1_score_90: 0.3028 - macro_f1_score_95: 0.1697
[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - f1_score: 0.8691 - loss: 0.0107
[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - PR-AUC: 0.9635 - ROC-AUC: 0.9925 - accuracy: 0.9055 - loss: 5.2401e-05 - macro_f1_score_10: 0.7272 - macro_f1_score_15: 0.7599 - macro_f1_s

# YAMNet

In [6]:
_, _, test_dataset_YAM, n_feat_YAM, n_target = prepData_YAM()
_, _, test_dataset_YAM_ds, n_feat_YAM, n_target = prepData_YAM(ds_nontonal_speech=True)

model_YAM_dropout_F1 = keras.saving.load_model(path_finder("model/YAMNet/MLP_corpora_categories/Dropout/macroF1"))
model_YAM_dropout_AUC = keras.saving.load_model(path_finder("model/YAMNet/MLP_corpora_categories/Dropout/ROC-AUC"))
model_YAM_LN_F1 = keras.saving.load_model(path_finder("model/YAMNet/MLP_corpora_categories/LayerNormalization/macroF1"))
model_YAM_LN_AUC = keras.saving.load_model(path_finder("model/YAMNet/MLP_corpora_categories/LayerNormalization/ROC-AUC"))

model_YAM_dropout_F1_ds = keras.saving.load_model(path_finder("model/YAMNet/MLP_corpora_categories/Dropout/macroF1/downsample"))
model_YAM_dropout_AUC_ds = keras.saving.load_model(path_finder("model/YAMNet/MLP_corpora_categories/Dropout/ROC-AUC/downsample"))
model_YAM_LN_F1_ds = keras.saving.load_model(path_finder("model/YAMNet/MLP_corpora_categories/LayerNormalization/macroF1/downsample"))
model_YAM_LN_AUC_ds = keras.saving.load_model(path_finder("model/YAMNet/MLP_corpora_categories/LayerNormalization/ROC-AUC/downsample"))


yamnet_output/embeddings/BibleTTS-akuapem-twi_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-asante-twi_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-ewe_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-hausa_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-lingala_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-yoruba_yamnetEmbeddings.npy
yamnet_output/embeddings/Buckeye_yamnetEmbeddings.npy
yamnet_output/embeddings/EUROM_yamnetEmbeddings.npy
yamnet_output/embeddings/HiltonMoser2022_speech_yamnetEmbeddings.npy
yamnet_output/embeddings/LibriSpeech_yamnetEmbeddings.npy
yamnet_output/embeddings/MediaSpeech-AR_yamnetEmbeddings.npy
yamnet_output/embeddings/MediaSpeech-ES_yamnetEmbeddings.npy
yamnet_output/embeddings/MediaSpeech-FR_yamnetEmbeddings.npy
yamnet_output/embeddings/MediaSpeech-TR_yamnetEmbeddings.npy
yamnet_output/embeddings/MozillaCommonVoice-ab_yamnetEmbeddings.npy
yamnet_output/embeddings/MozillaCommonVoice-ar_yamnetEmbeddings.npy
yamnet_ou

  target.replace({


Good to go!
yamnet_output/embeddings/BibleTTS-akuapem-twi_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-asante-twi_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-ewe_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-hausa_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-lingala_yamnetEmbeddings.npy
yamnet_output/embeddings/BibleTTS-yoruba_yamnetEmbeddings.npy
yamnet_output/embeddings/Buckeye_yamnetEmbeddings.npy
yamnet_output/embeddings/EUROM_yamnetEmbeddings.npy
yamnet_output/embeddings/HiltonMoser2022_speech_yamnetEmbeddings.npy
yamnet_output/embeddings/LibriSpeech_yamnetEmbeddings.npy
yamnet_output/embeddings/MediaSpeech-AR_yamnetEmbeddings.npy
yamnet_output/embeddings/MediaSpeech-ES_yamnetEmbeddings.npy
yamnet_output/embeddings/MediaSpeech-FR_yamnetEmbeddings.npy
yamnet_output/embeddings/MediaSpeech-TR_yamnetEmbeddings.npy
yamnet_output/embeddings/MozillaCommonVoice-ab_yamnetEmbeddings.npy
yamnet_output/embeddings/MozillaCommonVoice-ar_yamnetEmbeddings.n

  target.replace({


Embedding data and meta data mismatched!


In [7]:
eval_YAM_dropout_F1 = eval_model(model_YAM_dropout_F1, test_dataset_YAM)
eval_YAM_dropout_F1['model'] = 'YAM_dropout_F1'
eval_YAM_dropout_AUC = eval_model(model_YAM_dropout_AUC, test_dataset_YAM)
eval_YAM_dropout_AUC['model'] = 'YAM_dropout_AUC'

eval_YAM_LN_F1 = eval_model(model_YAM_LN_F1, test_dataset_YAM)
eval_YAM_LN_F1['model'] = 'YAM_LN_F1'
eval_YAM_LN_AUC = eval_model(model_YAM_LN_AUC, test_dataset_YAM)
eval_YAM_LN_AUC['model'] = 'YAM_LN_AUC'

eval_YAM_dropout_F1_ds = eval_model(model_YAM_dropout_F1_ds, test_dataset_YAM_ds)
eval_YAM_dropout_F1_ds['model'] = 'YAM_dropout_F1_ds'
eval_YAM_dropout_AUC_ds = eval_model(model_YAM_dropout_AUC_ds, test_dataset_YAM_ds)
eval_YAM_dropout_AUC_ds['model'] = 'YAM_dropout_AUC_ds'

eval_YAM_LN_F1_ds = eval_model(model_YAM_LN_F1_ds, test_dataset_YAM_ds)
eval_YAM_LN_F1_ds['model'] = 'YAM_LN_F1_ds'
eval_YAM_LN_AUC_ds = eval_model(model_YAM_LN_AUC_ds, test_dataset_YAM_ds)
eval_YAM_LN_AUC_ds['model'] = 'YAM_LN_AUC_ds'

df_YAM_eval = pd.concat([
    eval_YAM_dropout_F1,
    eval_YAM_dropout_AUC,
    eval_YAM_LN_F1,
    eval_YAM_LN_AUC,
    eval_YAM_dropout_F1_ds,
    eval_YAM_dropout_AUC_ds,
    eval_YAM_LN_F1_ds,
    eval_YAM_LN_AUC_ds,
    ], ignore_index=True)

time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")

df_YAM_eval.to_csv("model/MLP_summary_YAM_"+time_stamp+".csv", index=False)

[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - PR-AUC: 0.9516 - ROC-AUC: 0.9903 - accuracy: 0.8940 - loss: 2.8396e-08 - macro_f1_score_10: 0.7171 - macro_f1_score_15: 0.7400 - macro_f1_score_20: 0.7633 - macro_f1_score_25: 0.7869 - macro_f1_score_30: 0.8107 - macro_f1_score_35: 0.8308 - macro_f1_score_40: 0.8445 - macro_f1_score_45: 0.8476 - macro_f1_score_5: 0.6791 - macro_f1_score_50: 0.8390 - macro_f1_score_55: 0.8029 - macro_f1_score_60: 0.7555 - macro_f1_score_65: 0.7003 - macro_f1_score_70: 0.6328 - macro_f1_score_75: 0.5534 - macro_f1_score_80: 0.4561 - macro_f1_score_85: 0.3417 - macro_f1_score_90: 0.2317 - macro_f1_score_95: 0.1322
[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - f1_score: 0.8425 - loss: 2.8396e-08
[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - PR-AUC: 0.9535 - ROC-AUC: 0.9906 - accuracy: 0.9004 - loss: 6.1258e-05 - macro_f1_score_10: 0.7064 - macro_f1_score_15: 0.7374 - m

# Mel-Spectrogram (PCA)

In [8]:
_, _, test_dataset_mel, n_feat_mel, n_target = prepData_melspectrogram(n_pca=1024)
_, _, test_dataset_mel_ds, n_feat_mel, n_target = prepData_melspectrogram(ds_nontonal_speech=True, n_pca=1024)

model_mel_dropout_F1 = keras.saving.load_model(path_finder("model/melspectrogram_norm_nan/MLP_corpora_categories/PCA/Dropout/macroF1"))
model_mel_dropout_AUC = keras.saving.load_model(path_finder("model/melspectrogram_norm_nan/MLP_corpora_categories/PCA/Dropout/ROC-AUC"))
model_mel_LN_F1 = keras.saving.load_model(path_finder("model/melspectrogram_norm_nan/MLP_corpora_categories/PCA/LayerNormalization/macroF1"))
model_mel_LN_AUC = keras.saving.load_model(path_finder("model/melspectrogram_norm_nan/MLP_corpora_categories/PCA/LayerNormalization/ROC-AUC"))

model_mel_dropout_F1_ds = keras.saving.load_model(path_finder("model/melspectrogram_norm_nan/MLP_corpora_categories/PCA/Dropout/macroF1/downsample"))
model_mel_dropout_AUC_ds = keras.saving.load_model(path_finder("model/melspectrogram_norm_nan/MLP_corpora_categories/PCA/Dropout/ROC-AUC/downsample"))
model_mel_LN_F1_ds = keras.saving.load_model(path_finder("model/melspectrogram_norm_nan/MLP_corpora_categories/PCA/LayerNormalization/macroF1/downsample"))
model_mel_LN_AUC_ds = keras.saving.load_model(path_finder("model/melspectrogram_norm_nan/MLP_corpora_categories/PCA/LayerNormalization/ROC-AUC/downsample"))


melspectrogram_norm_output/BibleTTS-akuapem-twi_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-asante-twi_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-ewe_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-hausa_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-lingala_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-yoruba_melspectrogram.npy
melspectrogram_norm_output/Buckeye_melspectrogram.npy
melspectrogram_norm_output/EUROM_melspectrogram.npy
melspectrogram_norm_output/HiltonMoser2022_speech_melspectrogram.npy
melspectrogram_norm_output/LibriSpeech_melspectrogram.npy
melspectrogram_norm_output/MediaSpeech-AR_melspectrogram.npy
melspectrogram_norm_output/MediaSpeech-ES_melspectrogram.npy
melspectrogram_norm_output/MediaSpeech-FR_melspectrogram.npy
melspectrogram_norm_output/MediaSpeech-TR_melspectrogram.npy
melspectrogram_norm_output/MozillaCommonVoice-ab_melspectrogram.npy
melspectrogram_norm_output/MozillaCommonVoice-ar_melspectrogram.npy
melspectr

  target.replace({


Good to go!
melspectrogram_norm_output/BibleTTS-akuapem-twi_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-asante-twi_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-ewe_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-hausa_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-lingala_melspectrogram.npy
melspectrogram_norm_output/BibleTTS-yoruba_melspectrogram.npy
melspectrogram_norm_output/Buckeye_melspectrogram.npy
melspectrogram_norm_output/EUROM_melspectrogram.npy
melspectrogram_norm_output/HiltonMoser2022_speech_melspectrogram.npy
melspectrogram_norm_output/LibriSpeech_melspectrogram.npy
melspectrogram_norm_output/MediaSpeech-AR_melspectrogram.npy
melspectrogram_norm_output/MediaSpeech-ES_melspectrogram.npy
melspectrogram_norm_output/MediaSpeech-FR_melspectrogram.npy
melspectrogram_norm_output/MediaSpeech-TR_melspectrogram.npy
melspectrogram_norm_output/MozillaCommonVoice-ab_melspectrogram.npy
melspectrogram_norm_output/MozillaCommonVoice-ar_melspectrogram.n

  target.replace({


Embedding data and meta data mismatched!


In [9]:
eval_mel_dropout_F1 = eval_model(model_mel_dropout_F1, test_dataset_mel)
eval_mel_dropout_F1['model'] = 'mel_dropout_F1'
eval_mel_dropout_AUC = eval_model(model_mel_dropout_AUC, test_dataset_mel)
eval_mel_dropout_AUC['model'] = 'mel_dropout_AUC'

eval_mel_LN_F1 = eval_model(model_mel_LN_F1, test_dataset_mel)
eval_mel_LN_F1['model'] = 'mel_LN_F1'
eval_mel_LN_AUC = eval_model(model_mel_LN_AUC, test_dataset_mel)
eval_mel_LN_AUC['model'] = 'mel_LN_AUC'

eval_mel_dropout_F1_ds = eval_model(model_mel_dropout_F1_ds, test_dataset_mel_ds)
eval_mel_dropout_F1_ds['model'] = 'mel_dropout_F1_ds'
eval_mel_dropout_AUC_ds = eval_model(model_mel_dropout_AUC_ds, test_dataset_mel_ds)
eval_mel_dropout_AUC_ds['model'] = 'mel_dropout_AUC_ds'

eval_mel_LN_F1_ds = eval_model(model_mel_LN_F1_ds, test_dataset_mel_ds)
eval_mel_LN_F1_ds['model'] = 'mel_LN_F1_ds'
eval_mel_LN_AUC_ds = eval_model(model_mel_LN_AUC_ds, test_dataset_mel_ds)
eval_mel_LN_AUC_ds['model'] = 'mel_LN_AUC_ds'

df_mel_eval = pd.concat([
    eval_mel_dropout_F1,
    eval_mel_dropout_AUC,
    eval_mel_LN_F1,
    eval_mel_LN_AUC,
    eval_mel_dropout_F1_ds,
    eval_mel_dropout_AUC_ds,
    eval_mel_LN_F1_ds,
    eval_mel_LN_AUC_ds,
    ], ignore_index=True)

time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")

df_mel_eval.to_csv("model/MLP_summary_mel_"+time_stamp+".csv", index=False)

[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 10ms/step - PR-AUC: 0.7403 - ROC-AUC: 0.9437 - accuracy: 0.7590 - loss: 0.0441 - macro_f1_score_10: 0.4373 - macro_f1_score_15: 0.4508 - macro_f1_score_20: 0.4644 - macro_f1_score_25: 0.4723 - macro_f1_score_30: 0.4776 - macro_f1_score_35: 0.4767 - macro_f1_score_40: 0.4583 - macro_f1_score_45: 0.4080 - macro_f1_score_5: 0.3991 - macro_f1_score_50: 0.3717 - macro_f1_score_55: 0.3305 - macro_f1_score_60: 0.2767 - macro_f1_score_65: 0.2105 - macro_f1_score_70: 0.1565 - macro_f1_score_75: 0.1254 - macro_f1_score_80: 0.1107 - macro_f1_score_85: 0.1003 - macro_f1_score_90: 0.0922 - macro_f1_score_95: 0.0813
[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - f1_score: 0.4776 - loss: 0.0441
[1m414/414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - PR-AUC: 0.7455 - ROC-AUC: 0.9458 - accuracy: 0.7638 - loss: 0.0172 - macro_f1_score_10: 0.4318 - macro_f1_score_15: 0.4525 - macro_f1_scor

# AST

In [None]:
_, _, test_dataset_AST, n_feat_AST, n_target = prepData_AST()
_, _, test_dataset_AST_ds, n_feat_AST, n_target = prepData_AST(ds_nontonal_speech=True)

model_AST_dropout_F1 = keras.saving.load_model(path_finder("model/AST/MLP_corpora_categories/Dropout/macroF1"))
model_AST_dropout_AUC = keras.saving.load_model(path_finder("model/AST/MLP_corpora_categories/Dropout/ROC-AUC"))
model_AST_LN_F1 = keras.saving.load_model(path_finder("model/AST/MLP_corpora_categories/LayerNormalization/macroF1"))
model_AST_LN_AUC = keras.saving.load_model(path_finder("model/AST/MLP_corpora_categories/LayerNormalization/ROC-AUC"))

model_AST_dropout_F1_ds = keras.saving.load_model(path_finder("model/AST/MLP_corpora_categories/Dropout/macroF1/downsample"))
model_AST_dropout_AUC_ds = keras.saving.load_model(path_finder("model/AST/MLP_corpora_categories/Dropout/ROC-AUC/downsample"))
model_AST_LN_F1_ds = keras.saving.load_model(path_finder("model/AST/MLP_corpora_categories/LayerNormalization/macroF1/downsample"))
model_AST_LN_AUC_ds = keras.saving.load_model(path_finder("model/AST/MLP_corpora_categories/LayerNormalization/ROC-AUC/downsample"))

In [None]:
eval_AST_dropout_F1 = eval_model(model_AST_dropout_F1, test_dataset_AST)
eval_AST_dropout_F1['model'] = 'AST_dropout_F1'
eval_AST_dropout_AUC = eval_model(model_AST_dropout_AUC, test_dataset_AST)
eval_AST_dropout_AUC['model'] = 'AST_dropout_AUC'

eval_AST_LN_F1 = eval_model(model_AST_LN_F1, test_dataset_AST)
eval_AST_LN_F1['model'] = 'AST_LN_F1'
eval_AST_LN_AUC = eval_model(model_AST_LN_AUC, test_dataset_AST)
eval_AST_LN_AUC['model'] = 'AST_LN_AUC'

eval_AST_dropout_F1_ds = eval_model(model_AST_dropout_F1_ds, test_dataset_AST_ds)
eval_AST_dropout_F1_ds['model'] = 'AST_dropout_F1_ds'
eval_AST_dropout_AUC_ds = eval_model(model_AST_dropout_AUC_ds, test_dataset_AST_ds)
eval_AST_dropout_AUC_ds['model'] = 'AST_dropout_AUC_ds'

eval_AST_LN_F1_ds = eval_model(model_AST_LN_F1_ds, test_dataset_AST_ds)
eval_AST_LN_F1_ds['model'] = 'AST_LN_F1_ds'
eval_AST_LN_AUC_ds = eval_model(model_AST_LN_AUC_ds, test_dataset_AST_ds)
eval_AST_LN_AUC_ds['model'] = 'AST_LN_AUC_ds'

df_AST_eval = pd.concat([
    eval_AST_dropout_F1,
    eval_AST_dropout_AUC,
    eval_AST_LN_F1,
    eval_AST_LN_AUC,
    eval_AST_dropout_F1_ds,
    eval_AST_dropout_AUC_ds,
    eval_AST_LN_F1_ds,
    eval_AST_LN_AUC_ds,
    ], ignore_index=True)

time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")

df_AST_eval.to_csv("model/MLP_summary_AST_"+time_stamp+".csv", index=False)