In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import librosa
import librosa.display
import tensorflow as tf
import tensorflow_io as tfio
import keras
import sklearn
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import sys
import h5py
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Input,Dense, Conv2D, Flatten, MaxPooling2D, Dropout, BatchNormalization, Conv1D, MaxPooling1D
from keras.layers import Bidirectional, LSTM, Reshape
from keras.regularizers import l2
from keras.callbacks import ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler
import pickle

In [2]:
# constants
cwd = os.getcwd()
CREMA = cwd + "/CREMA/"
RAVDESS = cwd + "/RAVDESS/audio_speech_actors_01-24/"
SAVEE = cwd + "/SAVEE/"
TESS = cwd + "/TESS/TESS_Toronto_emotional_speech_set_data/"
set1 = ['emotion', 'path', 'Sex']
set2 = ['sex_emotion', 'path','Sex']

In [3]:
def read_signal_file_to_df(df: pd.DataFrame, path: str) -> pd.DataFrame:
    """."""
    df[['signal', 'signal_sr']] = pd.DataFrame(
        data=df['filename'].apply(lambda x: librosa.load(os.path.join(path, x))).to_list(),
        columns=['signal', 'signal_sr']
    )
    
    df['signal'] = df['signal'].apply(lambda x: librosa.util.normalize(x))
    df['len_signal'] = df['signal'].apply(lambda x: len(x))
    return df

In [4]:
def file_list_to_df_crema(file_list: list):
    """."""
    
    columns = ['ActorID', 'sentence', 'emotion_id', 'emotion', 'quantifier', 'path']
    emotion_dict = {
        "SAD": "sadness", 
        "ANG": "anger", 
        "DIS": "disgust", 
        "FEA": "fear", 
        "HAP": "happiness", 
        "NEU": "neutral"
    }
    file_df = pd.DataFrame(columns=columns)
    for file in file_list:
        file_as_list = file.split('_')
        row = pd.DataFrame(
            data={
                'ActorID': file_as_list[0],
                'sentence': file_as_list[1],
                'emotion_id': file_as_list[2],
                'emotion': emotion_dict[file_as_list[2]],
                'quantifier': file_as_list[3].split('.')[0],
                'path': os.path.join(CREMA,"AudioWAV/", file)
            },
            columns=columns,
            index=[0]
        )
        file_df = file_df.append(row, ignore_index=True)
    return file_df.reset_index(drop=True)


def get_data_crema(path):
    data = os.path.join(path, "AudioWAV/")
    file_list = os.listdir(data)
    speaker_df = pd.read_csv(f'{path}VideoDemographics.csv', dtype=str).apply(lambda x: x.astype(str).str.lower())
    file_df = file_list_to_df_crema(file_list)
    data_df = speaker_df.merge(file_df, on='ActorID')
    data_df['sex_emotion'] = data_df['Sex'] + '_' + data_df['emotion']
#     data_df = read_signal_file_to_df(data_df.copy(deep=True), path=data)
    return data_df

# CREMA

In [5]:
data_df = get_data_crema(CREMA)
original_CREMA_df = data_df[set1].copy().reset_index(drop=True)
# CREMA_df2 = data_df[set2].copy().reset_index(drop=True)
# CREMA_M_df = data_df[data_df['Sex']=='male'].copy()[set1].reset_index(drop=True)
# CREMA_F_df = data_df[data_df['Sex']=='female'].copy()[set1].reset_index(drop=True)

In [6]:
del data_df

In [7]:
original_CREMA_df.head()

Unnamed: 0,emotion,path,Sex
0,anger,E:\Dissertation/CREMA/AudioWAV/1001_DFA_ANG_XX...,male
1,disgust,E:\Dissertation/CREMA/AudioWAV/1001_DFA_DIS_XX...,male
2,fear,E:\Dissertation/CREMA/AudioWAV/1001_DFA_FEA_XX...,male
3,happiness,E:\Dissertation/CREMA/AudioWAV/1001_DFA_HAP_XX...,male
4,neutral,E:\Dissertation/CREMA/AudioWAV/1001_DFA_NEU_XX...,male


In [8]:
# CREMA_df2.head()

In [9]:
# CREMA_M_df.head()

In [10]:
# CREMA_F_df.head()

In [11]:
original_CREMA_df.to_pickle('original_CREMA_df.pkl')
# CREMA_df2.to_pickle('CREMA_df2.pkl')
# CREMA_M_df.to_pickle('CREMA_M_df.pkl')
# CREMA_F_df.to_pickle('CREMA_F_df.pkl')

In [12]:
del original_CREMA_df
# del CREMA_df2
# del CREMA_M_df
# del CREMA_F_df

# RAVDESS

In [13]:
def file_list_to_df_ravdess(file_list: list):
    """."""
    
    columns = ['modality', 'vocal_channel', 'emotion', 'emotion_intensity', 'statement', 'repetition', 'Sex', 'path']
    emotion_dict = {
        "01": "neutral", 
        "02": "calmness", 
        "03": "happiness", 
        "04": "sadness", 
        "05": "anger", 
        "06": "fear",
        "07": "disgust",
        "08": "surprise",
    }
    file_df = pd.DataFrame(columns=columns)
    for file in file_list:
        file_as_list = file.split('/')[1].split('-')
        row = pd.DataFrame(
            data={
                'modality': file_as_list[0],
                'vocal_channel': file_as_list[1],
                'emotion': emotion_dict[file_as_list[2]],
                'emotion_intensity': file_as_list[3],
                'statement': file_as_list[4],
                'repetition': file_as_list[5],
                'Sex': 'female' if int(file_as_list[6].split('.')[0])%2 == 0 else 'male',
                'path': os.path.join(RAVDESS, file)
            },
            columns=columns,
            index=[0]
        )
        file_df = file_df.append(row, ignore_index=True)
#     file_df = file_df[file_df['emotion']!='calmness'].reset_index(drop=True)
    return file_df

def get_data_ravdess():
    folder_list = os.listdir(RAVDESS)
    file_list = []
    for folder in folder_list:
        for file in os.listdir(RAVDESS+folder):
            file_list.append(f'{folder}/{file}')
            
    data_df = file_list_to_df_ravdess(file_list)
    data_df['sex_emotion'] = data_df['Sex'] + '_' + data_df['emotion']
#     data_df = read_signal_file_to_df(data_df.copy(deep=True), path=RAVDESS)
    return data_df

In [14]:
data_df = get_data_ravdess()
original_RAVDESS_df = data_df[set1].copy().reset_index(drop=True)
# RAVDESS_df2 = data_df[set2].copy().reset_index(drop=True)
# RAVDESS_M_df = data_df[data_df['Sex']=='male'].copy()[set1].reset_index(drop=True)
# RAVDESS_F_df = data_df[data_df['Sex']=='female'].copy()[set1].reset_index(drop=True)

In [15]:
del data_df

In [16]:
# RAVDESS_df.path[0]

In [17]:
# RAVDESS_df

In [18]:
original_RAVDESS_df.head()

Unnamed: 0,emotion,path,Sex
0,neutral,E:\Dissertation/RAVDESS/audio_speech_actors_01...,male
1,neutral,E:\Dissertation/RAVDESS/audio_speech_actors_01...,male
2,neutral,E:\Dissertation/RAVDESS/audio_speech_actors_01...,male
3,neutral,E:\Dissertation/RAVDESS/audio_speech_actors_01...,male
4,calmness,E:\Dissertation/RAVDESS/audio_speech_actors_01...,male


In [19]:
# RAVDESS_M_df.head()

In [20]:
# RAVDESS_F_df.head()

In [21]:
original_RAVDESS_df.to_pickle('original_RAVDESS_df.pkl')
# RAVDESS_df2.to_pickle('RAVDESS_df2.pkl')
# RAVDESS_M_df.to_pickle('RAVDESS_M_df.pkl')
# RAVDESS_F_df.to_pickle('RAVDESS_F_df.pkl')

In [22]:
# del RAVDESS_df
# del RAVDESS_df2
# del RAVDESS_M_df
# del RAVDESS_F_df

# SAVEE


In [23]:
def file_list_to_df_savee(file_list: list):
    """."""
    
    emotion=[]
    path = []
    for i in file_list:
        if i[-8:-6]=='_a':
            emotion.append('anger')
        elif i[-8:-6]=='_d':
            emotion.append('disgust')
        elif i[-8:-6]=='_f':
            emotion.append('fear')
        elif i[-8:-6]=='_h':
            emotion.append('happiness')
        elif i[-8:-6]=='_n':
            emotion.append('neutral')
        elif i[-8:-6]=='sa':
            emotion.append('sadness')
        elif i[-8:-6]=='su':
            emotion.append('surprise')
        else:
            emotion.append('unknown') 
        path.append(os.path.join(SAVEE, i))

    # Now check out the label count distribution 
    file_df = pd.DataFrame(emotion, columns=['emotion'])
    file_df['Sex'] = 'male'
    file_df = pd.concat([file_df, pd.DataFrame(path, columns=['path'])], axis=1)
#     file_df = file_df[file_df['emotion']!='unknown'].reset_index(drop=True)
    return file_df

def get_data_savee():
    file_list = os.listdir(SAVEE)
    data_df = file_list_to_df_savee(file_list)
    data_df['sex_emotion'] = data_df['Sex'] + '_' + data_df['emotion']
#     data_df = read_signal_file_to_df(data_df.copy(deep=True), path=SAVEE)
    return data_df


In [24]:
data_df = get_data_savee()
original_SAVEE_df = data_df[set1].copy().reset_index(drop=True)
# SAVEE_df2 = data_df[set2].copy().reset_index(drop=True)
# SAVEE_M_df = data_df[data_df['Sex']=='male'].copy()[set1].reset_index(drop=True)
# SAVEE_F_df = data_df[data_df['Sex']=='female'].copy()[set1].reset_index(drop=True)

In [25]:
del data_df

In [26]:
original_SAVEE_df.head()

Unnamed: 0,emotion,path,Sex
0,anger,E:\Dissertation/SAVEE/DC_a01.wav,male
1,anger,E:\Dissertation/SAVEE/DC_a02.wav,male
2,anger,E:\Dissertation/SAVEE/DC_a03.wav,male
3,anger,E:\Dissertation/SAVEE/DC_a04.wav,male
4,anger,E:\Dissertation/SAVEE/DC_a05.wav,male


In [27]:
# SAVEE_df2.head()

In [28]:
original_SAVEE_df.to_pickle('original_SAVEE_df.pkl')
# SAVEE_df2.to_pickle('SAVEE_df2.pkl')

In [29]:
del original_SAVEE_df
# del SAVEE_df2

# TESS

In [30]:
def file_list_to_df_tess(file_list: list):
    """."""
    # Get the data location for TESS
    path = []
    emotion = []
    dir_list = os.listdir(TESS)

    for i in dir_list:
        fname = os.listdir(TESS + i)   
        for f in fname:
            if i == 'OAF_angry' or i == 'YAF_angry':
                emotion.append('anger')
            elif i == 'OAF_disgust' or i == 'YAF_disgust':
                emotion.append('disgust')
            elif i == 'OAF_Fear' or i == 'YAF_fear':
                emotion.append('fear')
            elif i == 'OAF_happy' or i == 'YAF_happy':
                emotion.append('happiness')
            elif i == 'OAF_neutral' or i == 'YAF_neutral':
                emotion.append('neutral')                                
            elif i == 'OAF_Pleasant_surprise' or i == 'YAF_pleasant_surprised':
                emotion.append('surprise')               
            elif i == 'OAF_Sad' or i == 'YAF_sad':
                emotion.append('sadness')
            else:
                emotion.append('unknown')
            path.append(os.path.join(TESS, f'{i}/{f}'))

    file_df = pd.DataFrame(emotion, columns = ['emotion'])
    file_df['Sex'] = 'female'
    file_df = pd.concat([file_df, pd.DataFrame(path, columns=['path'])], axis=1)
#     file_df = file_df[file_df['emotion']!='unknown'].reset_index(drop=True)
    return file_df

def get_data_tess():
    file_list = os.listdir(TESS)
    data_df = file_list_to_df_tess(file_list)
    data_df['sex_emotion'] = data_df['Sex'] + '_' + data_df['emotion']
#     data_df = read_signal_file_to_df(data_df.copy(deep=True), path=TESS)
    return data_df

In [31]:
data_df = get_data_tess()
original_TESS_df = data_df[set1].copy().reset_index(drop=True)
# TESS_df2 = data_df[set2].copy().reset_index(drop=True)
# TESS_M_df = data_df[data_df['Sex']=='male'].copy()[set1].reset_index(drop=True)
# TESS_F_df = data_df[data_df['Sex']=='female'].copy()[set1].reset_index(drop=True)

In [32]:
del data_df

In [33]:
original_TESS_df.head()

Unnamed: 0,emotion,path,Sex
0,anger,E:\Dissertation/TESS/TESS_Toronto_emotional_sp...,female
1,anger,E:\Dissertation/TESS/TESS_Toronto_emotional_sp...,female
2,anger,E:\Dissertation/TESS/TESS_Toronto_emotional_sp...,female
3,anger,E:\Dissertation/TESS/TESS_Toronto_emotional_sp...,female
4,anger,E:\Dissertation/TESS/TESS_Toronto_emotional_sp...,female


In [34]:
# TESS_df2.head()

In [35]:
original_TESS_df.to_pickle('original_TESS_df.pkl')
# TESS_df2.to_pickle('TESS_df2.pkl')

In [36]:
# del TESS_df
# del TESS_df2

In [37]:
def get_sample_from_df(df, row_nr = None):
    if not row_nr:
        n_rows = df.shape[0]
        row_nr = np.random.randint(0, n_rows)
    return df.iloc[row_nr]


def do(sample):
    plt.figure(figsize=(15, 5))
    librosa.display.waveshow(sample['signal'], sr = 22050)
    plt.title('signal')
    plt.show()

In [38]:
# do(get_sample_from_df(CREMA_df))

In [39]:
# do(get_sample_from_df(RAVDESS_df))

In [40]:
# do(get_sample_from_df(SAVEE_df))

In [41]:
# do(get_sample_from_df(TESS_df))

# Concatenate Datasets

In [42]:
# all_df = pd.concat([TESS_df, SAVEE_df, CREMA_df, RAVDESS_df]).reset_index(drop=True)
# all_df2 = pd.concat([TESS_df2, SAVEE_df2, CREMA_df2, RAVDESS_df2]).reset_index(drop=True)
# all_M_df = pd.concat([SAVEE_df, CREMA_M_df, RAVDESS_M_df]).reset_index(drop=True)
# all_F_df = pd.concat([TESS_df, CREMA_F_df, RAVDESS_F_df]).reset_index(drop=True)

# Save to file

In [43]:
# all_df.to_pickle('all_samples.pkl')
# all_df2.to_pickle('all_samples2.pkl')
# all_M_df.to_pickle('male_samples.pkl')
# all_F_df.to_pickle('female_samples.pkl')

In [44]:
# # test
# df = pd.read_pickle('male_samples.pkl')
# df.head()