In [11]:
import glob
import pickle

import librosa
import numpy as np
from tensorflow.python.keras.utils import to_categorical

from data import FeatureType
from legacy.convert_iemocap_dataset_to_pkl import load_wav, split_audio, remove_silent, _get_mfcc


In [12]:
data_dir = '/Volumes/Kingston/datasets/audio/SAVEE/AudioData'

SPEAKERS = ['DC', 'JE', 'JK', 'KL']
EMOTIONS = ['h', 'sa', 'a', 'n']

In [13]:
def get_emotion(filepath:str):
    filename = filepath.split('/')[-1]
    emo = filename[0:-6]
    if emo in EMOTIONS:
        return EMOTIONS.index(emo)
    else:
        return None

In [14]:
filtered_files = []
for S in SPEAKERS:
    speaker_dir = data_dir + '/' + S
    file_list = glob.glob(speaker_dir + "/*.wav")
    for f in file_list:
        emotion = get_emotion(f)
        if emotion is not None:
            filtered_files.append(f)


In [15]:
file_lengths = []
sr = None
for f in filtered_files:

    audio, sr = librosa.load(f, sr=sr)
    file_lengths.append(len(audio) / sr)

file_lengths = np.array(file_lengths)
print("Mean Audio File Length : {}".format(np.mean(file_lengths)))
print("SR: {}".format(sr))

Mean Audio File Length : 3.841959259259259
SR: 44100


In [16]:
def get_mfcc(filename, duration):
    a, sr = load_wav(filename)
    a = remove_silent(a)
    signal_frame = split_audio(a, sr, duration)[0]
    mfcc = _get_mfcc(signal_frame,sr)
    return mfcc



In [17]:
data_list = []

for f in filtered_files:
    datum = {
        FeatureType.MFCC.name: get_mfcc(f, 3), 
        'signal': load_wav(f), 
        'y_emo': to_categorical(get_emotion(f), num_classes=len(EMOTIONS),dtype='int'),
        'filename': f.split('/')[-2] + '/' + f.split('/')[-1]
    }
    
    data_list.append(datum)

data_list = np.array(data_list)
print("end")

end


In [18]:
pkl_filename = "../pkl/savee_sr_44k_3sec_{}-classes.pkl".format(len(EMOTIONS))
with open(pkl_filename, 'wb') as f:
    pickle.dump(data_list, f)
print("saved: ", pkl_filename)

saved:  ../pkl/savee_sr_44k_3sec_4-classes.pkl
