In [4]:
import librosa.display
import os
from scipy.io import wavfile
import librosa
import os
import pandas as pd
import numpy as np
import re

## Silence elimination

In [5]:
def eliminar_silencio(ruta_base: str, carpetas: list) -> None:
    """
    Removes the silence from the .wav files in the specified folders and saves the clean files in a new folder called 'clean_audios'.

    :param ruta_base: The base path where the folders with the audio files are located.
    :type ruta_base: str
    :param carpetas: A list with the names of the folders to be processed.
    :type carpetas: list
    :return: None
    """

    if not os.path.exists("../data/clean_audios"):
        os.mkdir("../data/clean_audios")

    for directorio, subdirectorios, archivos in os.walk(ruta_base):

        if os.path.basename(directorio) in carpetas:
            for archivo in archivos:
                if archivo.endswith(".wav"):
                    audio, tasa_muestreo = librosa.load(os.path.join(directorio, archivo), sr=None)
                    audio_sin_silencio, indices_no_silencio = librosa.effects.trim(audio)
                    ruta_limpia = os.path.join("../data/clean_audios", os.path.basename(directorio))
                    if not os.path.exists(ruta_limpia):
                        os.mkdir(ruta_limpia)
                    wavfile.write(os.path.join(ruta_limpia, archivo), tasa_muestreo, audio_sin_silencio)


In [6]:
eliminar_silencio("../data/audios/",["emoreact", "Iemocap", "ravdess", "TESS"])

KeyboardInterrupt: 

## Dataframe Creation

### Ravdess

In [7]:
ravdess = '../data/clean_audios/ravdess'

In [8]:
def create_rav_emotion_df(ruta_clean_audios: str) -> pd.DataFrame:
    """
    Creates a dataframe with two columns: emotion and path, using the emotion label encoded in the file names
    of the .wav files in the specified directory.

    :param ruta_clean_audios: The path where the clean audio files are located.
    :type ruta_clean_audios: str
    :return: A pandas dataframe with the emotion label and the path of each file.
    :rtype: pd.DataFrame
    """

    # List to store the emotion and path for each file
    emotion_paths = []

    # Loop through all the .wav files in the specified directory
    for root, _, files in os.walk(ruta_clean_audios):
        for file in files:
            if file.endswith(".wav"):
                # Get the emotion label from the file name
                emotion_label = int(file[7])

                # Assign emotion based on label
                if emotion_label == 1 or emotion_label == 2:
                    emotion = "neutral"
                elif emotion_label == 3:
                    emotion = "joy"
                elif emotion_label == 4:
                    emotion = "sadness"
                elif emotion_label == 5 or emotion_label == 7:
                    emotion = "anger"
                elif emotion_label == 6:
                    emotion = "fear"
                elif emotion_label == 8:
                    emotion = "surprise"

                # Create tuple with emotion and path
                path = os.path.join(root, file)
                emotion_path = (emotion, path)

                # Append to list
                emotion_paths.append(emotion_path)

    # Create pandas dataframe with emotion and path columns
    df = pd.DataFrame(emotion_paths, columns=["emotion", "path"])

    return df


In [9]:
ravdess_df=create_rav_emotion_df(ravdess)
ravdess_df.head(5)

Unnamed: 0,emotion,path
0,sadness,../data/clean_audios/ravdess/03-01-04-01-02-02...
1,fear,../data/clean_audios/ravdess/03-01-06-02-02-02...
2,joy,../data/clean_audios/ravdess/03-01-03-02-01-02...
3,neutral,../data/clean_audios/ravdess/03-01-01-01-01-02...
4,neutral,../data/clean_audios/ravdess/03-01-02-02-01-02...


### Iemocap

In [10]:
iemocapCsv = '../data/aud_em/iemo.csv'

In [11]:
def create_path_dataframe(ruta_csv):
    """
    Reads a csv file with a 'path' column that contains file paths in a specific format,
    modifies the paths to include the correct folder and replaces the emotion values with
    the desired values, then filters out any rows with emotion values that are not in the list
    of emotions to consider.

    :param ruta_csv: The path of the csv file to read.
    :type ruta_csv: str
    :return: A pandas dataframe with the modified paths.
    :rtype: pandas.DataFrame
    """

    # List of emotions to consider
    emotions_to_consider = ['neutral', 'joy', 'sadness', 'anger', 'fear', 'surprise']

    # Read the csv file
    df = pd.read_csv(ruta_csv)

    # Modify the 'path' column
    df['path'] = df['path'].apply(lambda x: "../data/clean_audios/iemocap/" + x.split("/")[-1])

    # Replace the 'emotion' values
    df['emotion'] = df['emotion'].replace({'neu': 'neutral', 'fru': 'anger', 'sad': 'sadness', 'sur': 'surprise',
                                           'ang': 'anger', 'hap': 'joy', 'exc': 'joy', 'fea': 'fear', 'dis': 'anger'})

    # Filter out any rows with emotion values that are not in the list of emotions to consider
    df = df[df['emotion'].isin(emotions_to_consider)]

    # Select only the 'path' column and return the resulting dataframe
    return df[['emotion', 'path']]


In [12]:
iemocap_df = create_path_dataframe(iemocapCsv)
iemocap_df.head(5)

Unnamed: 0,emotion,path
0,neutral,../data/clean_audios/iemocap/Ses01F_script02_1...
1,anger,../data/clean_audios/iemocap/Ses01F_script02_1...
3,surprise,../data/clean_audios/iemocap/Ses01F_script02_1...
4,neutral,../data/clean_audios/iemocap/Ses01F_script02_1...
6,anger,../data/clean_audios/iemocap/Ses01F_script02_1...


### TESS

In [13]:
tess = '../data/clean_audios/TESS'

In [14]:
def create_emotion_path_dataframe(ruta: str) -> pd.DataFrame:
    """
    Creates a dataframe with two columns: 'emotion' and 'path'. The function reads the names of the .wav files
    contained in the directory specified by the path parameter and, based on the presence of certain keywords
    within the file name, assigns an emotion value to the 'emotion' column. The 'path' column contains the full path
    to the .wav file. Only emotions that are explicitly defined are included in the dataframe.

    :param ruta: The path where the .wav files are located.
    :type ruta: str
    :return: A pandas dataframe with two columns: 'emotion' and 'path'.
    :rtype: pandas.DataFrame
    """
    emotions = {'fear': 'fear', 'ps': 'surprise', 'sad': 'sadness', 'angry': 'anger', 'disgust': 'anger', 'happy': 'joy', 'neutral': 'neutral'}
    file_paths = [os.path.join(ruta, f) for f in os.listdir(ruta) if f.endswith('.wav')]
    data = {'emotion': [], 'path': []}
    for path in file_paths:
        emotion = None
        for word, value in emotions.items():
            if word in path.lower():
                emotion = value
                break
        if emotion is not None:
            data['emotion'].append(emotion)
            data['path'].append(path)
    df = pd.DataFrame(data)
    return df

In [15]:
tess_df=create_emotion_path_dataframe(tess)
tess_df.head(5)

Unnamed: 0,emotion,path
0,joy,../data/clean_audios/TESS/OAF_shall_happy.wav
1,anger,../data/clean_audios/TESS/OAF_bite_disgust.wav
2,neutral,../data/clean_audios/TESS/YAF_voice_neutral.wav
3,sadness,../data/clean_audios/TESS/YAF_raise_sad.wav
4,joy,../data/clean_audios/TESS/OAF_calm_happy.wav


## Dataframe Fusion

In [19]:
merged_df = pd.concat([tess_df, iemocap_df, ravdess_df], ignore_index=True)
merged_df.head(5)

Unnamed: 0,emotion,path
0,joy,../data/clean_audios/TESS/OAF_shall_happy.wav
1,anger,../data/clean_audios/TESS/OAF_bite_disgust.wav
2,neutral,../data/clean_audios/TESS/YAF_voice_neutral.wav
3,sadness,../data/clean_audios/TESS/YAF_raise_sad.wav
4,joy,../data/clean_audios/TESS/OAF_calm_happy.wav


## Audio Length

In [25]:
def obtener_duraciones_df(df, column_path: str) -> tuple:
    """
    Receives a pandas dataframe with a 'path' column and returns a tuple with a list of
    the durations of every audio file in the path column and the number of audio files found.

    :param df: The pandas dataframe with the 'path' column.
    :type df: pandas.DataFrame
    :param column_path: The name of the column that contains the audio file paths.
    :type column_path: str
    :return: A tuple with a list of durations and the number of audio files found.
    :rtype: tuple
    """

    duraciones = []
    num_audios = 0

    for path in df[column_path]:
        if ".wav" in path:
            duracion = librosa.get_duration(filename=path)
            duraciones.append(duracion)
            num_audios += 1

    return duraciones, num_audios


In [26]:
durations, num_audios = obtener_duraciones_df(merged_df, 'path')
print("Number of audio files found:", num_audios)


	This alias will be removed in version 1.0.
  duracion = librosa.get_duration(filename=path)
	Audioread support is deprecated in librosa 0.10.0 and will be removed in version 1.0.
  duracion = librosa.get_duration(filename=path)


FileNotFoundError: [Errno 2] No such file or directory: '../data/clean_audios/iemocap/Ses01F_script02_1_F000.wav'