In [12]:
from pathlib import Path
from tqdm import tqdm

def prepare_RAVDESS_DS(path_audios):
    """
    Generation of the dataframe with the information of the dataset. The dataframe has the following structure:
     ______________________________________________________________________________________________________________________________
    |             name            |                     path                                   |     emotion      |     actor     |
    ______________________________________________________________________________________________________________________________
    |  01-01-01-01-01-01-01.wav   |    <RAVDESS_dir>/audios_16kHz/01-01-01-01-01-01-01.wav     |     Neutral      |     1         |
    ______________________________________________________________________________________________________________________________
    ...
    :param path_audios: Path to the folder that contains all the audios in .wav format, 16kHz and single-channel(mono)
    """
    dict_emotions_ravdess = {
        0: 'Neutral',
        1: 'Calm',
        2: 'Happy',
        3: 'Sad',
        4: 'Angry',
        5: 'Fear',
        6: 'Disgust',
        7: 'Surprise'
    }
    
    wav_paths, emotions, names = [], [], []
    for path in tqdm(Path(path_audios).glob("*/*.wav")):
        name = str(path).split('/')[-1].split('.')[0]
        actor = int(name.split("-")[-1])
        label = int(name.split("-")[2]) - 1  # Start emotions in 0

        try:
            wav_paths.append(path)
            emotions.append(label)
            names.append(actor)
        except Exception as e:
            # print(str(path), e)
            pass
        
    return wav_paths, emotions, names

In [13]:
import numpy as np
import csv

wav_paths, emotions, names = prepare_RAVDESS_DS('dataset')
wav_paths = np.asarray(wav_paths)
print(wav_paths[0])
print(emotions[0])

f = open('melspectrogram_class.csv', 'w', newline='')
write = csv.writer(f)

for i in range(len(wav_paths)):
    info = [wav_paths[i], emotions[i], names[i]]
    write.writerow(info)

1440it [00:00, 32384.80it/s]

dataset\Actor_01\03-01-01-01-01-01-01.wav
0





In [14]:
## train test 5-fold

import pandas as pd
import natsort

mel_images = 'melspectrogram_images'
images = os.listdir(mel_images)
images = natsort.natsorted(images)

paths = []
for mel in images:
    p = os.path.join(mel_images, mel)
    paths.append(p)

data = []
for i in range(len(wav_paths)):
    data.append({
        "path": paths[i],
        "emotion": emotions[i],
        "actor": names[i]
    })

df = pd.DataFrame(data)

print(df)

                                path  emotion  actor
0        melspectrogram_images\0.png        0      1
1        melspectrogram_images\1.png        0      1
2        melspectrogram_images\2.png        0      1
3        melspectrogram_images\3.png        0      1
4        melspectrogram_images\4.png        1      1
...                              ...      ...    ...
1435  melspectrogram_images\1435.png        7     24
1436  melspectrogram_images\1436.png        7     24
1437  melspectrogram_images\1437.png        7     24
1438  melspectrogram_images\1438.png        7     24
1439  melspectrogram_images\1439.png        7     24

[1440 rows x 3 columns]


In [15]:
import os
actors_per_fold = {
    0: [2,5,14,15,16],
    1: [3, 6, 7, 13, 18],
    2: [10, 11, 12, 19, 20],
    3: [8, 17, 21, 23, 24],
    4: [1, 4, 9, 22],
}

save_root='melspectrogram_fold'
os.makedirs(save_root, exist_ok=True)
for i in range(5):
    test_df = df.loc[df['actor'].isin(actors_per_fold[i])]
    train_df = df.loc[~df['actor'].isin(actors_per_fold[i])]
    
    train_df = train_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)
    
    save_path = os.path.join(save_root, str(i))
    os.makedirs(save_path, exist_ok=True)
    
    train_df.to_csv(f'{save_path}/train.csv', sep=',', encoding='utf-8', index=False, header=False)
    test_df.to_csv(f'{save_path}/test.csv', sep=',', encoding='utf-8', index=False, header=False)

In [11]:
import csv
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt

def save_melspectrogram(save_path, wav_paths, emotions):
    os.makedirs(save_path, exist_ok=True)
    
    f = open('melspectrogram.csv', 'w')
    write = csv.writer(f)
    
    for i, wav_path in tqdm(enumerate(wav_paths), desc='saved melspectrogram images'):
        y, sr = librosa.load(wav_path, 16000)
        
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=1024, win_length=512, window='hamming', hop_length=256, n_mels=256, fmax=sr/2)
        melspectrogram = librosa.power_to_db(S, ref=np.max)
        melspectrogram = melspectrogram[:226,39:220]
        librosa.display.specshow(melspectrogram, sr=sr)
        
        melspectrogram_path = os.path.join(save_path, str(i)+'.png')
        plt.axis('off')
        plt.savefig(melspectrogram_path, bbox_inches='tight', pad_inches = 0)
        
        info = [melspectrogram_path, emotions[i], names[i]]
        write.writerow(info)

In [None]:
save_melspectrogram('melspectrogram_images', wav_paths, emotions)