## Imports

In [1]:
import librosa
import pandas as pd
import numpy as np

## Paths

In [2]:
paths = {"datasets": "../../../assets/audio_sentiment_data_v2/dataset", 
         "save_path": "../../../assets/audio_sentiment_data_v2/data_features"}

RAV = f"{paths['datasets']}/ravdess-emotional-speech-audio/"
SAVEE = f"{paths['datasets']}/surrey-audiovisual-expressed-emotion-savee/ALL/"
TESS = f"{paths['datasets']}/toronto-emotional-speech-set-tess/"

## Filtering the datasets based on the required emotions

### SAVEE Dataset

In [3]:
dir_list = os.listdir(SAVEE)
emotion=[]
path = []

for i in dir_list:
    if i[-8:-6]=='_a':
        emotion.append('angry')
        path.append(SAVEE + i)
    elif i[-8:-6]=='_f':
        emotion.append('fear')
        path.append(SAVEE + i)
    elif i[-8:-6]=='_h':
        emotion.append('happy')
        path.append(SAVEE + i)
    elif i[-8:-6]=='sa':
        emotion.append('sad')
        path.append(SAVEE + i)
    elif i[-8:-6]=='su':
        emotion.append('surprise')
        path.append(SAVEE + i)
    
# Now check out the label count distribution 
SAVEE_df = pd.DataFrame(emotion, columns = ['labels'])
SAVEE_df['source'] = 'SAVEE'
SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(path, columns = ['path'])], axis = 1)
SAVEE_df.labels.value_counts()

angry       60
happy       60
surprise    60
sad         60
fear        60
Name: labels, dtype: int64

### RAVDESS Dataset

In [6]:
dir_list = os.listdir(RAV)
dir_list.sort()

emotion = []
path = []

for i in dir_list:
    fname = os.listdir(RAV + i)
    for f in fname:
        part = f.split('.')[0].split('-')
        if part[2] not in ['01','02','07']:
            emotion.append(int(part[2]))
            path.append(RAV + i + '/' + f)

RAV_df = pd.DataFrame(emotion)
RAV_df = RAV_df.replace({3:'happy', 4:'sad', 5:'angry', 6:'fear', 8:'surprise'})
RAV_df.columns = ['emotion']
RAV_df['labels'] = RAV_df.emotion
RAV_df['source'] = 'RAVDESS'
RAV_df = pd.concat([RAV_df,pd.DataFrame(path, columns = ['path'])],axis=1)
RAV_df = RAV_df.drop(['emotion'], axis=1)
RAV_df.labels.value_counts()

happy       192
angry       192
surprise    192
sad         192
fear        192
Name: labels, dtype: int64

### TESS Dataset

In [8]:
dir_list = os.listdir(TESS)
dir_list.sort()

path = []
emotion = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry' or i == 'YAF_angry':
            emotion.append('angry')
            path.append(TESS + i + "/" + f)
        elif i == 'OAF_Fear' or i == 'YAF_fear':
            emotion.append('fear')
            path.append(TESS + i + "/" + f)
        elif i == 'OAF_happy' or i == 'YAF_happy':
            emotion.append('happy')
            path.append(TESS + i + "/" + f)
        elif i == 'OAF_Pleasant_surprise' or i == 'YAF_pleasant_surprised':
            emotion.append('surprise')            
            path.append(TESS + i + "/" + f)
        elif i == 'OAF_Sad' or i == 'YAF_sad':
            emotion.append('sad')
            path.append(TESS + i + "/" + f)
        
TESS_df = pd.DataFrame(emotion, columns = ['labels'])
TESS_df['source'] = 'TESS'
TESS_df = pd.concat([TESS_df,pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.labels.value_counts()

sad         400
fear        400
happy       400
angry       400
surprise    400
Name: labels, dtype: int64

## Saving the file paths

In [10]:
df = pd.concat([SAVEE_df, RAV_df, TESS_df], axis = 0)
print(df.labels.value_counts())
df.head()
df.to_csv(f"{paths['save_path']}/data_paths.csv",index=False)

sad         652
fear        652
happy       652
surprise    652
angry       652
Name: labels, dtype: int64
