# SAVEE and ESD testing

Here the best model found on the val subset of SAVEE is tested firstly on the test subset of SAVEE and also on the ESD test subdataset.

In addition, the reverse action is also done and best ESD trained model is tested on SAVEE test data.

In [1]:
import sys
sys.path.insert(0, "../")

In [2]:
from tensorflow import keras

import libs.data_loader as data_loader
import libs.data_operations as data_ops

import re

ESD_data_dir = "../data/ESD/"
SAVEE_data_dir = "../data/SAVEE/"

# SAVEE - specific
def get_SAVEE_label(file_path):
  parts = re.sub('.+\_|[0-9]+.wav', '', file_path)
  return parts

def get_SAVEE_speaker_name(file_path):
  parts = re.sub('.*[/]+|\_|[a-z]+[0-9]+.wav', '', file_path)
  return parts

# ESD - specific
def get_ESD_label(file_path):
  parts = re.sub('.*[/]+[a-z]\_|\_[0-9]+.wav', '', file_path)
  return parts

def get_ESD_speaker_name(file_path):
  parts = re.sub('.*[/]+|\_|[a-z]+_[0-9]+.wav', '', file_path)
  return parts

In [2]:
SAVEE_df, _, SAVEE_max_sample_rate = data_loader.get_dataset_information(SAVEE_data_dir, get_SAVEE_label, get_SAVEE_speaker_name)

In [3]:
ESD_df, _, ESD_max_sample_rate = data_loader.get_dataset_information(ESD_data_dir, get_ESD_label, get_ESD_speaker_name)

In [4]:
SAVEE_max_sample_rate, ESD_max_sample_rate

(44100, 16000)

## Best SAVEE on ESD

In [5]:
SAVEE_model = keras.models.load_model("../saved_models/mPaperModel_s3_b16_d0_p80_o_crop_fade15_sz62.5,20.833,16.666/")
SAVEE_model.summary()

Model: "sequential_79"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_553 (Conv1D)         (None, 132280, 32)        704       
                                                                 
 batch_normalization_553 (Ba  (None, 132280, 32)       128       
 tchNormalization)                                               
                                                                 
 max_pooling1d_474 (MaxPooli  (None, 66140, 32)        0         
 ng1D)                                                           
                                                                 
 conv1d_554 (Conv1D)         (None, 66122, 64)         38976     
                                                                 
 batch_normalization_554 (Ba  (None, 66122, 64)        256       
 tchNormalization)                                               
                                                     

In [6]:
data_preprocessing = lambda total_audio_frames: [
  data_ops.Crop(),
  data_ops.Fade(total_audio_frames * 0.15, total_audio_frames * 0.15),
]

In [7]:
# remove from SAVEE label f and d because they'r not present as labelS in ESD
ESD_df["label_f"] = 0 # SAVEE_df.drop(columns=["label_f", "label_d"])
ESD_df["label_d"] = 0 # SAVEE_df.drop(columns=["label_f", "label_d"])

In [8]:
_, _, SAVEE_test_ds, _ = data_loader.load_datasets(SAVEE_df, SAVEE_max_sample_rate, 3, data_preprocessing, [62.5, 20.833, 16.666])













In [9]:
_, _, ESD_test_ds, _ = data_loader.load_datasets(ESD_df, SAVEE_max_sample_rate, 3, data_preprocessing, [5, 5, 90])













In [10]:
SAVEE_df["label"].unique()

array(['a', 'd', 'f', 'h', 'n', 'sa', 'su'], dtype=object)

In [11]:
ESD_df["label"].unique()

array(['a', 'h', 'n', 'sa', 'su'], dtype=object)

In [12]:
ESD_test_ds = ESD_test_ds.batch(16)
SAVEE_test_ds = SAVEE_test_ds.batch(16)

In [13]:
SAVEE_model.evaluate(SAVEE_test_ds)



[5.684537887573242, 0.6299999952316284]

In [14]:
SAVEE_model.evaluate(ESD_test_ds)



[6.133326530456543, 0.19824999570846558]

## Best ESD on SAVEE

Load ESD and SAVEE test sub dataset (also apply the same preprocessing needed by the model).

In this specific case, the best model trained on ESD data requires the split of data audio in 4 seconds and the application of crop and normalization.

In [5]:
# 'crop_norm'
data_preprocessing = lambda _: [
        data_ops.Crop(),
        data_ops.Normalize()]

In [6]:
# remove from SAVEE label f and d because they'r not present as labelS in ESD
SAVEE_df = SAVEE_df.drop(columns=["label_f", "label_d"])

SAVEE_df = SAVEE_df.loc[SAVEE_df["label"] != "f"]
SAVEE_df = SAVEE_df.loc[SAVEE_df["label"] != "d"]

In [7]:
SAVEE_df["label"].unique()

array(['a', 'h', 'n', 'sa', 'su'], dtype=object)

In [8]:
# test ds tensor dataset of tuples (feats, target)
_, _, ESD_test_ds, _ = data_loader.load_datasets(ESD_df, ESD_max_sample_rate, 4, data_preprocessing, [70, 15, 15])













In [10]:
_, _, SAVEE_test_ds, _ = data_loader.load_datasets(SAVEE_df, ESD_max_sample_rate, 4, data_preprocessing, [0, 0, 100])













In [12]:
ESD_test_ds = ESD_test_ds.batch(64)
SAVEE_test_ds = SAVEE_test_ds.batch(64)

In [13]:
#  model_name = f"m{model_factory.get_model_name()}_s{audio_seconds}_b{batch_size}_d{dropout}_p{patience}_o_{data_ops_name}_sz{str(train_val_tests_percentage).replace(' ', '')[1:-1]}"
ESD_model = keras.models.load_model("../saved_models/mModel_convl5_fcl3_fcsize20_filters32_fsize9_poolsize4_act_relu_drop0.2_lr001_b19_b2999____s4_b64_d0.5_p80_o_crop_norm_sz70,15,15/")
ESD_model.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_85 (Conv1D)          (None, 63992, 32)         320       
                                                                 
 batch_normalization_85 (Bat  (None, 63992, 32)        128       
 chNormalization)                                                
                                                                 
 max_pooling1d_68 (MaxPoolin  (None, 15998, 32)        0         
 g1D)                                                            
                                                                 
 conv1d_86 (Conv1D)          (None, 15990, 32)         9248      
                                                                 
 batch_normalization_86 (Bat  (None, 15990, 32)        128       
 chNormalization)                                                
                                                     

In [14]:
ESD_model.evaluate(ESD_test_ds)



[2.055967092514038, 0.7171755433082581]

In [15]:
ESD_model.evaluate(SAVEE_test_ds)



[38.42875289916992, 0.18581080436706543]