In [None]:
import librosa
import librosa.display as librosa_display
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from pathlib import Path
import pandas as pd
import os
import time

In [None]:
def prepare_CREMA_DS(path_audios):
    wav_paths, emotions, actors = [], [], []
    
    for path in tqdm(Path(path_audios).glob("*.wav")):
        f = str(path)
        
        emotion = None
        emo = f.split('_')[2]
        if emo == 'SAD':
            emotion = 0
        elif emo == 'ANG':
            emotion = 1
        elif emo == 'DIS':
            emotion = 2
        elif emo == 'FEA':
            emotion = 3
        elif emo == 'HAP':
            emotion = 4
        elif emo == 'NEU':
            emotion = 5
                    
        wav_paths.append(path)
        emotions.append(emotion)
        actors.append(int(path.stem.split('_')[0]) - 1001)
    
    return wav_paths, emotions, actors        

In [None]:
wav_paths, emotions, actors = prepare_CREMA_DS('CREMA-D')

In [None]:
max(actors), min(actors)

In [None]:
len(wav_paths)

In [None]:
sample_rate = 16000

frame_length = 0.05
frame_stride = 0.0125

In [None]:
mean_signal_length = 100000

def get_feature(paths:str, mfcc_len:int=39, flatten:bool=False):
    features = []
    
    for i, path, in tqdm(enumerate(paths), desc='get features.....'):
        signal, fs = librosa.load(path)
        s_len = len(signal)
        
        if s_len < mean_signal_length:
            pad_len = mean_signal_length - s_len
            pad_rem = pad_len % 2
            pad_len //= 2
            signal = np.pad(signal, (pad_len, pad_len+pad_rem), 'constant', constant_values=0)
            
        else:
            pad_len = s_len - mean_signal_length
            pad_len //= 2
            signal = signal[pad_len:pad_len + mean_signal_length]
            
        mfcc = librosa.feature.mfcc(y=signal, sr=fs, n_mfcc=39)
        mfcc = mfcc.T
        
        features.append(mfcc)
    
    return features

In [11]:
features = get_feature(wav_paths)

get features.....: 7442it [05:04, 24.46it/s]


In [15]:
features[0].shape

(196, 39)

In [20]:
X = np.array(features)
y = np.array(emotions)

In [21]:
print(X.shape, y.shape)

os.makedirs('TIMNET-dataset', exist_ok=True)
with open('TIMNET-dataset/CREMA.npy', 'wb') as f:
    np.save(f, X)
    np.save(f, y)

(7442, 196, 39) (7442,)
