In [1]:
from pathlib import Path
from tqdm import tqdm
import csv
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import warnings
from natsort import natsorted
import pandas as pd

warnings.filterwarnings(action='ignore')

In [2]:
def prepare_EmoDB_DS(path_audios):
    """
    Generation of the dataframe with the information of the dataset. The dataframe has the following structure:
     ______________________________________________________________________________________________________________________________
    |             name            |                     path                                   |     emotion      |     actor     |
    ______________________________________________________________________________________________________________________________
    |  03a01Fa.wav                |          <EmoDB_dir>/wav/03a01Fa.wav                       |     Happiness    |      03       |
    ______________________________________________________________________________________________________________________________
    ...
    :param path_audios: Path to the folder that contains all the audios in .wav format, 16kHz and single-channel(mono)
    """
    dict_emotions = {
        'W': 0,
        'L': 1,
        'E': 2,
        'A': 3,
        'F': 4,
        'T': 5,
        'N': 6
    }
    
    listdir = os.listdir(path_audios)
    listdir = natsorted(listdir)
    
    paths, emotions, actors = [], [], []
    for path in listdir:
        paths.append(os.path.join(path_audios, path))
        emotions.append(dict_emotions[path[5]])
        actors.append(path[:2])
        
    return paths, emotions, actors

In [3]:
paths, emotions, actors = prepare_EmoDB_DS('wav')

In [None]:
def save_melspectrogram(save_path, paths, emotions, actors):
    os.makedirs(save_path, exist_ok=True)
    
    data = []
    for i, path in tqdm(enumerate(paths), desc='melspectrogram image generate.....'):
        y, sr = librosa.load(path, sr=16000)
        
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=1024, win_length=512, window='hamming', hop_length=256, n_mels=256, fmax=sr/2)
        melspectrogram = librosa.power_to_db(S, ref=np.max)
        librosa.display.specshow(melspectrogram, sr=sr)
        
        melspectrogram_path = os.path.join(save_path, str(i)+'.png')
        plt.axis('off')
        plt.savefig(melspectrogram_path, bbox_inches='tight', pad_inches = 0)
        
        data.append({
            'img': melspectrogram_path,
            'emotion': emotions[i],
            'actor': actors[i]
        })
        
    df = pd.DataFrame(data)
    return df

In [None]:
df = save_melspectrogram("melspectrogram_images", paths, emotions, actors)

In [4]:
def generate_train_test(fold, df, save_path=""):
    """
    Divide the data in train and test in a subject-wise 5-CV way. The division is generated before running the training
    of each fold.
    :param fold:[int] Fold to create the train and test sets [ranging from 0 - 4]
    :param df:[DataFrame] Dataframe with the complete list of files generated
    :param save_path:[str] Path to save the train.csv and test.csv per fold
    """
    ['03', '08', '09', '10', '11', '12', '13', '14', '15', '16']
    
    actors_per_fold = {
        0: ['03', '08'],
        1: ['09', '10'],
        2: ['11', '12'],
        3: ['13', '14'],
        4: ['15', '16']
    }

    test_df = df.loc[df['actor'].isin(actors_per_fold[fold])]
    train_df = df.loc[~df['actor'].isin(actors_per_fold[fold])]

    train_df = train_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)

    if(save_path!=""):
        train_df.to_csv(f'{save_path}/train.csv', sep=',', encoding='utf-8', index=False, header=False)
        test_df.to_csv(f'{save_path}/test.csv', sep=',', encoding='utf-8', index=False, header=False)
    return train_df, test_df

In [7]:
import time

df = save_melspectrogram("melspectrogram_images", paths, emotions, actors)

for fold in range(5):
    
    save_path = os.path.join('5-CV', "fold"+str(fold))
    os.makedirs(save_path, exist_ok=True)
    
    generate_train_test(fold, df, save_path)
    time.sleep(10)

melspectrogram image generate.....: 535it [00:00, 177457.70it/s]
