In [1]:
import librosa
import librosa.display as librosa_display
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from pathlib import Path
import pandas as pd
import os
import time
from natsort import natsorted

In [2]:

label2name = {
    "L": 0,
    "A": 1,
    "E": 2,
    "F": 3,
    "T": 4,
    "W": 5,
    "N": 6
}

def prepare_EMODB(data_root):
    dirs = os.listdir(data_root)
    dirs = natsorted(dirs)
    
    paths, emotions, actors = [], [], []
    
    for d in dirs:
        cur = os.path.join(data_root, d)
        
        paths.append(cur)
        emotions.append(label2name[d[5]])
        actors.append(d[:2])
    
    return paths, emotions, actors

In [3]:
wav_paths, emotions, actors = prepare_EMODB('EMO-DB')

In [4]:
mean_signal_length = 100000

def get_feature(paths:str, mfcc_len:int=39, flatten:bool=False):
    features = []
    
    pad_cnt, non_pad_cnt = 0, 0
    for i, path, in tqdm(enumerate(paths), desc='get features.....'):
        signal, fs = librosa.load(path)
        s_len = len(signal)
        
        if s_len < mean_signal_length:
            pad_len = mean_signal_length - s_len
            pad_rem = pad_len % 2
            pad_len //= 2
            signal = np.pad(signal, (pad_len, pad_len+pad_rem), 'constant', constant_values=0)
            
            pad_cnt += 1
            
        else:
            pad_len = s_len - mean_signal_length
            pad_len //= 2
            signal = signal[pad_len:pad_len + mean_signal_length]
            
            non_pad_cnt += 1
            
        mfcc = librosa.feature.mfcc(y=signal, sr=fs, n_mfcc=39)
        mfcc = mfcc.T
        
        features.append(mfcc)
    
    return features, pad_cnt, non_pad_cnt

In [5]:
features, pad_cnt, non_pad_cnt = get_feature(wav_paths)

get features.....: 535it [00:22, 24.15it/s]


In [6]:
pad_cnt, non_pad_cnt

(507, 28)

In [6]:
features[0].shape

(196, 39)

In [7]:
X = np.array(features)
y = np.array(emotions)

In [8]:
print(X.shape, y.shape)

os.makedirs('TIMNET-dataset', exist_ok=True)
with open('TIMNET-dataset/EMODB.npy', 'wb') as f:
    np.save(f, X)
    np.save(f, y)

(535, 196, 39) (535,)
