In [1]:
from pathlib import Path
from tqdm import tqdm

def prepare_RAVDESS_DS(path_audios):
    """
    Generation of the dataframe with the information of the dataset. The dataframe has the following structure:
     ______________________________________________________________________________________________________________________________
    |             name            |                     path                                   |     emotion      |     actor     |
    ______________________________________________________________________________________________________________________________
    |  01-01-01-01-01-01-01.wav   |    <RAVDESS_dir>/audios_16kHz/01-01-01-01-01-01-01.wav     |     Neutral      |     1         |
    ______________________________________________________________________________________________________________________________
    ...
    :param path_audios: Path to the folder that contains all the audios in .wav format, 16kHz and single-channel(mono)
    """
    dict_emotions_ravdess = {
        0: 'Neutral',
        1: 'Calm',
        2: 'Happy',
        3: 'Sad',
        4: 'Angry',
        5: 'Fear',
        6: 'Disgust',
        7: 'Surprise'
    }
    
    wav_paths, emotions, names = [], [], []
    for path in tqdm(Path(path_audios).glob("*/*.wav")):
        name = str(path).split('/')[-1].split('.')[0]
        actor = int(name.split("-")[-1])
        label = int(name.split("-")[2]) - 1  # Start emotions in 0

        try:
            wav_paths.append(path)
            emotions.append(label)
            names.append(actor)
        except Exception as e:
            # print(str(path), e)
            pass
        
    return wav_paths, emotions, names

In [2]:
import numpy as np
import csv

wav_paths, emotions, names = prepare_RAVDESS_DS('dataset')
wav_paths = np.asarray(wav_paths)

1440it [00:00, 65454.32it/s]


In [3]:
import pandas as pd
import natsort

def make_df(image_path):
    images = os.listdir(image_path)
    images = natsort.natsorted(images)
    
    paths = []
    for img in images:
        p = os.path.join(image_path, img)
        paths.append(p)
    
    data = []
    for i in range(len(wav_paths)):
        data.append({
            "path": paths[i],
            "emotion": emotions[i],
            "actor": names[i]
        })
        
    df = pd.DataFrame(data)
    
    return df

In [5]:
import os
actors_per_fold = {
    0: [2,5,14,15,16],
    1: [3, 6, 7, 13, 18],
    2: [10, 11, 12, 19, 20],
    3: [8, 17, 21, 23, 24],
    4: [1, 4, 9, 22],
}

save_roots = ['chroma_stft', 'spectral_contrast', 'tonnetz', 'mfcc']

for save_root in save_roots:
    save_path = save_root + '_fold'
    save_root += '_images'
    os.makedirs(save_root, exist_ok=True)
    
    df = make_df(save_root)
    
    for i in range(5):
        test_df = df.loc[df['actor'].isin(actors_per_fold[i])]
        train_df = df.loc[~df['actor'].isin(actors_per_fold[i])]
    
        train_df = train_df.reset_index(drop=True)
        test_df = test_df.reset_index(drop=True)
    
        fold_path = os.path.join(save_path, str(i))
        os.makedirs(fold_path, exist_ok=True)
    
        train_df.to_csv(f'{fold_path}/train.csv', sep=',', encoding='utf-8', index=False, header=False)
        test_df.to_csv(f'{fold_path}/test.csv', sep=',', encoding='utf-8', index=False, header=False)