In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
dataset="/content/drive/MyDrive/audio_speech_actors"

In [3]:
import warnings
warnings.filterwarnings('ignore')

import IPython.display as ipd
import librosa
import librosa.display
import pandas as pd
import numpy as np
import os

In [4]:
actor_folders = os.listdir(dataset)
actor_folders.sort()
actor_folders

['Actor_01',
 'Actor_02',
 'Actor_03',
 'Actor_04',
 'Actor_05',
 'Actor_06',
 'Actor_07',
 'Actor_08',
 'Actor_09',
 'Actor_10',
 'Actor_11',
 'Actor_12',
 'Actor_13',
 'Actor_14',
 'Actor_15',
 'Actor_16',
 'Actor_17',
 'Actor_18',
 'Actor_19',
 'Actor_20',
 'Actor_21',
 'Actor_22',
 'Actor_23',
 'Actor_24']

In [5]:
emotions = []
gender = []
actor_ID = []
file_path = []
for i in actor_folders:
    filename = os.listdir(dataset +'/'+ i) #iterate over Actor folders
    for f in filename: # go through files in the corresponding Actor's folder
        part = f.split('.')[0].split('-')#We split the file name initially as['03-01-01-01-01-01-01','wav'];Now,the 0th element is converted as ['03','01','01','01','01','01','01']
        emotions.append(int(part[2])) #Since,the second element represents the emotion of the actor.
        actor_ID.append(int(part[6])) #Since,the sixth element represents the ID of the actor.
        bg = int(part[6])
        if bg%2 == 0:
            bg = "female" #Since,even IDs correspond to females.
        else:
            bg = "male"  #Since,odd IDs correspond to females.
        gender.append(bg)
        file_path.append(dataset +'/'+ i + '/' + f)

#Creating a dataframe:
audio_df = pd.DataFrame(emotions)
audio_df = audio_df.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'})
audio_df = pd.concat([pd.DataFrame(gender),audio_df,pd.DataFrame(actor_ID)],axis=1)
audio_df.columns = ['Gender','Emotion','Actor_ID']
audio_df = pd.concat([audio_df,pd.DataFrame(file_path, columns = ['Path'])],axis=1)
audio_df


Unnamed: 0,Gender,Emotion,Actor_ID,Path
0,male,neutral,1,/content/drive/MyDrive/audio_speech_actors/Act...
1,male,happy,1,/content/drive/MyDrive/audio_speech_actors/Act...
2,male,sad,1,/content/drive/MyDrive/audio_speech_actors/Act...
3,male,happy,1,/content/drive/MyDrive/audio_speech_actors/Act...
4,male,happy,1,/content/drive/MyDrive/audio_speech_actors/Act...
...,...,...,...,...
1411,female,angry,24,/content/drive/MyDrive/audio_speech_actors/Act...
1412,female,disgust,24,/content/drive/MyDrive/audio_speech_actors/Act...
1413,female,disgust,24,/content/drive/MyDrive/audio_speech_actors/Act...
1414,female,surprise,24,/content/drive/MyDrive/audio_speech_actors/Act...


In [6]:
def extract_features(data):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally

    return result

def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)

    # without augmentation
    res1 = extract_features(data)
    result = np.array(res1)

    return result


In [7]:
path = np.array(audio_df.Path)[1]
data, sample_rate = librosa.load(path)

X, Y = [], []
for path, emotion in zip(audio_df.Path, audio_df.Emotion):
    feature = get_features(path)
    # for ele in feature:
    #     X.append(ele)
    #     Y.append(emotion)
    X.append(feature)
    Y.append(emotion)


In [11]:
audio_df.Path

0       /content/drive/MyDrive/audio_speech_actors/Act...
1       /content/drive/MyDrive/audio_speech_actors/Act...
2       /content/drive/MyDrive/audio_speech_actors/Act...
3       /content/drive/MyDrive/audio_speech_actors/Act...
4       /content/drive/MyDrive/audio_speech_actors/Act...
                              ...                        
1411    /content/drive/MyDrive/audio_speech_actors/Act...
1412    /content/drive/MyDrive/audio_speech_actors/Act...
1413    /content/drive/MyDrive/audio_speech_actors/Act...
1414    /content/drive/MyDrive/audio_speech_actors/Act...
1415    /content/drive/MyDrive/audio_speech_actors/Act...
Name: Path, Length: 1416, dtype: object

In [8]:
Features = pd.DataFrame(X)
Features['labels'] = Y
# Features.to_csv('features.csv', index=False)
Features.to_csv('/content/drive/MyDrive/features_dataset.csv', index=False)
Features.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,152,153,154,155,156,157,158,159,160,labels
0,0.321275,0.729664,0.750033,0.730624,0.735275,0.713529,0.660531,0.684966,0.733049,0.753972,...,4e-06,3e-06,2e-06,2e-06,5e-06,8e-06,7e-06,5e-06,4.245835e-07,neutral
1,0.25411,0.600759,0.597629,0.670451,0.641004,0.629776,0.649909,0.645301,0.699671,0.633312,...,8.6e-05,6.6e-05,5.7e-05,4e-05,3.3e-05,5.2e-05,4.2e-05,3e-05,2.448416e-06,happy
2,0.265847,0.668577,0.687229,0.691316,0.724266,0.64524,0.584157,0.595757,0.688347,0.744032,...,1e-05,8e-06,7e-06,1e-05,9e-06,1.3e-05,1.4e-05,8e-06,7.894424e-07,sad
3,0.267546,0.688829,0.730021,0.708485,0.706601,0.734002,0.700951,0.615528,0.63745,0.679339,...,1.4e-05,9e-06,1.5e-05,1e-05,1.3e-05,1.6e-05,1.2e-05,1.1e-05,1.123394e-06,happy
4,0.166879,0.572128,0.591161,0.624055,0.62581,0.631003,0.64099,0.592139,0.621158,0.636863,...,0.001118,0.001387,0.001287,0.001252,0.001699,0.001632,0.000791,0.000358,3.131467e-05,happy
