IMPORTING LIBRARIES

In [46]:
import os
import pandas as pd
import numpy as np
import librosa
import tensorflow
from tensorflow.keras.preprocessing.sequence import pad_sequences

LOADING THE DATASET

In [None]:
dataset_path="E:/final year project/dataset"
dataset_map="E:/final year project/dataset/speech_emotion_recognisition.csv"
if not os.path.exists(dataset_map):
    print("Data csv file couldnt be loaded")
elif not os.path.exists(dataset_path):
    print("Data couldn't be loaded")
else:
    print("Data loaded succesfully")

Data loaded succesfully


In [48]:
speech_df=pd.read_csv(dataset_map)
speech_df=speech_df[speech_df['Emotion']!='Surprised']
speech_df=speech_df[speech_df['Emotion']!='Calm']
speech_df = speech_df.drop(speech_df.columns[0], axis=1)
print(speech_df)

      Emotion                  Filename
0       Angry       1001_DFA_ANG_XX.wav
1     Disgust       1001_DFA_DIS_XX.wav
2        Fear       1001_DFA_FEA_XX.wav
3       Happy       1001_DFA_HAP_XX.wav
4     Neutral       1001_DFA_NEU_XX.wav
...       ...                       ...
8869  Disgust  03-01-07-01-02-02-24.wav
8870  Disgust  03-01-07-02-01-01-24.wav
8871  Disgust  03-01-07-02-01-02-24.wav
8872  Disgust  03-01-07-02-02-01-24.wav
8873  Disgust  03-01-07-02-02-02-24.wav

[8498 rows x 2 columns]


FEATURE EXTRACTION

DEFINING FUNCTION FO ZCR

In [None]:
def get_zcr(y,frame_length, hop_length,maxlen):
    zcr=librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length)
    return pad_sequences([zcr.T], maxlen=maxlen,truncating="post",padding="post")
# y,sr =librosa.load("E:/final year project/dataset/Calm/03-01-02-01-01-01-01.wav")
# print(y)
# zcr=get_zcr(y=y,frame_length=2048,hop_length=512,maxlen=130)
# print(zcr.shape)

[ 1.2775290e-05  5.2579480e-07 -5.9776858e-07 ...  6.3326383e-10
 -5.2984428e-10  4.3437093e-10]


DEFINING FUNCTION FOR RMS ENERGY

In [50]:
def get_rms(y,frame_length, hop_length,maxlen):
    rms=librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)
    return pad_sequences([rms.T], maxlen=maxlen,truncating="post",padding="post")

# rms=get_rms(y=y,frame_length=2048,hop_length=512,maxlen=130)
# print(rms.shape)

DEFINING FUNCTION FOR MFCC

In [51]:
def get_mfcc(y,sr,n_mfcc,hop_length,maxlen):
    mfcc=librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc,hop_length=hop_length)
    return pad_sequences([mfcc.T], maxlen=maxlen,truncating="post",padding="post")
# mfcc=get_mfcc(y=y,sr=16000,n_mfcc=13,hop_length=512,maxlen=130)
# print(mfcc.shape)


DEFINING MELSPECTOGRAM FUNCTION

In [None]:
def get_melspec(y,sr,n_fft,n_mels,hop_length,fmin,maxlen):
    mel_spec=librosa.feature.melspectrogram(y=y,sr=sr,n_fft=n_fft,n_mels=n_mels,hop_length=hop_length,fmin=fmin)
    mel_spec_db=librosa.power_to_db(mel_spec,ref=np.max)
    return pad_sequences([mel_spec_db.T],maxlen=maxlen,truncating="post",padding="post")
# mel_spec_db=get_melspec(y,sr,1024,64,512,50,130)
# print(mel_spec_db.shape)

(1, 130, 64)


COLLECTING THE FEATURES

In [61]:
# Defining the params value
FRAME_LENGTH=2048
HOP_LENGTH=512
N_MFCC=13
N_FFT=1024
MAX_LENGTH=130
N_MELS=128
FMIN=50
zcr_list=[]
rmse_list=[]
mfcc_list=[]
mel_spec_list=[]
labels=[]
# Iterating over each row
for index, row in speech_df.iterrows():
    path=os.path.join(dataset_path,row['Emotion'],row['Filename'])
    y,sr=librosa.load(path)
    zcr=get_zcr(y,FRAME_LENGTH,HOP_LENGTH,MAX_LENGTH)
    zcr_list.append(zcr)
    rmse=get_rms(y,FRAME_LENGTH,HOP_LENGTH,MAX_LENGTH)
    rmse_list.append(rmse)
    mfcc=get_mfcc(y,sr,N_MFCC,HOP_LENGTH,MAX_LENGTH)
    mfcc_list.append(mfcc)
    mel_spec=get_melspec(y,sr,N_FFT,N_MELS,HOP_LENGTH,FMIN,MAX_LENGTH)
    mel_spec_list.append(mel_spec)
    labels.append(row['Emotion'])
    print("Values calculated for ",index)

    


Values calculated for  0
Values calculated for  1
Values calculated for  2
Values calculated for  3
Values calculated for  4
Values calculated for  5
Values calculated for  6
Values calculated for  7
Values calculated for  8
Values calculated for  9
Values calculated for  10
Values calculated for  11
Values calculated for  12
Values calculated for  13
Values calculated for  14
Values calculated for  15
Values calculated for  16
Values calculated for  17
Values calculated for  18
Values calculated for  19
Values calculated for  20
Values calculated for  21
Values calculated for  22
Values calculated for  23
Values calculated for  24
Values calculated for  25
Values calculated for  26
Values calculated for  27
Values calculated for  28
Values calculated for  29
Values calculated for  30
Values calculated for  31
Values calculated for  32
Values calculated for  33
Values calculated for  34
Values calculated for  35
Values calculated for  36
Values calculated for  37
Values calculated for 

CONVERTING LIST INTO NUMPY ARRAY

In [69]:
zcr_array=np.array(zcr_list)
rmse_array=np.array(rmse_list)
mfcc_array=np.array(mfcc_list)
mel_spec_array=np.array(mel_spec_list)

In [78]:
features=np.concatenate([zcr_array,rmse_array,mfcc_array,mel_spec_array],axis=-1)
features=features.astype('float32')
Emotions=np.asarray(labels)
features.shape

(8498, 1, 130, 143)

In [80]:
features=features.reshape(features.shape[0],-1)
features_df=pd.DataFrame(features)
features_df['Labels']=Emotions

In [81]:
features_csv=features_df.to_csv(os.path.join(dataset_path,"Features.csv"))

In [83]:
features_df.shape

(8498, 18591)