In [14]:
#IMPORT THE LIBRARIES
import pandas as pd
import numpy as np

import os
import sys

# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import IPython.display as ipd
from IPython.display import Audio

In [2]:
#preparing data set

ravdess = "Z:/dev/machine/audio_speech_actors_01-24/"
Crema = "Z:/dev/machine/cremad/AudioWAV/"
Tess = "Z:/dev/machine/tess/"
Savee = "Z:/dev/machine/savee/ALL/"

In [3]:
file_emotion = []
file_path = []
ravdess_directory_list = os.listdir(ravdess)

for i in ravdess_directory_list:
    # as their are 24 different actors in our previous directory we need to extract files for each actor.
    actor = os.listdir(ravdess + i)
    for f in actor:
        part = f.split('.')[0].split('-')
    # third part in each file represents the emotion associated to that file.
        file_emotion.append(int(part[2]))
        file_path.append(ravdess + i + '/' + f)

In [4]:
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
ravdess_df = pd.concat([emotion_df, path_df], axis=1)
# changing integers to actual emotions.
ravdess_df.Emotions.replace({1:'neutral', 2:'neutral', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust',
                             8:'surprise'},
                            inplace=True)

selected_emotions = ['neutral', 'sad', 'happy', 'angry']
selected_ravdess_df = ravdess_df[ravdess_df['Emotions'].isin(selected_emotions)]


print(selected_ravdess_df.head())
print("______________________________________________")
print(selected_ravdess_df.tail())
print("_______________________________________________")
print(selected_ravdess_df.Emotions.value_counts())

  Emotions                                               Path
0  neutral  Z:/dev/machine/audio_speech_actors_01-24/Actor...
1  neutral  Z:/dev/machine/audio_speech_actors_01-24/Actor...
2  neutral  Z:/dev/machine/audio_speech_actors_01-24/Actor...
3  neutral  Z:/dev/machine/audio_speech_actors_01-24/Actor...
4  neutral  Z:/dev/machine/audio_speech_actors_01-24/Actor...
______________________________________________
     Emotions                                               Path
1411    angry  Z:/dev/machine/audio_speech_actors_01-24/Actor...
1412    angry  Z:/dev/machine/audio_speech_actors_01-24/Actor...
1413    angry  Z:/dev/machine/audio_speech_actors_01-24/Actor...
1414    angry  Z:/dev/machine/audio_speech_actors_01-24/Actor...
1415    angry  Z:/dev/machine/audio_speech_actors_01-24/Actor...
_______________________________________________
Emotions
neutral    288
happy      192
sad        192
angry      192
Name: count, dtype: int64


In [5]:
crema_directory_list = os.listdir(Crema)

file_emotion = []
file_path = []

for file in crema_directory_list:
    # storing file paths
    file_path.append(Crema + file)
    # storing file emotions
    part=file.split('_')
    if part[2] == 'SAD':
        file_emotion.append('sad')
    elif part[2] == 'ANG':
        file_emotion.append('angry')
    elif part[2] == 'DIS':
        file_emotion.append('disgust')
    elif part[2] == 'FEA':
        file_emotion.append('fear')
    elif part[2] == 'HAP':
        file_emotion.append('happy')
    elif part[2] == 'NEU':
        file_emotion.append('neutral')
    else:
        file_emotion.append('Unknown')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Crema_df = pd.concat([emotion_df, path_df], axis=1)
selected_emotions = ['neutral', 'sad', 'happy', 'angry']
selected_crema_df = Crema_df[Crema_df['Emotions'].isin(selected_emotions)]
selected_crema_df.head()
print(selected_crema_df.Emotions.value_counts())


Emotions
angry      1271
happy      1271
sad        1271
neutral    1087
Name: count, dtype: int64


In [6]:
tess_directory_list = os.listdir(Tess)

file_emotion = []
file_path = []

for dir in tess_directory_list:
    directories = os.listdir(Tess + dir)
    for file in directories:
        part = file.split('.')[0]
        part = part.split('_')[2]
        if part=='ps':
            file_emotion.append('surprise')
        else:
            file_emotion.append(part)
        file_path.append(Tess + dir + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])

Tess_df = pd.concat([emotion_df, path_df], axis=1)

selected_emotions = ['neutral', 'sad', 'happy', 'angry']
selected_tess_df = Tess_df[Tess_df['Emotions'].isin(selected_emotions)]

selected_tess_df.head()
print(selected_tess_df.Emotions.value_counts())

Emotions
angry      400
happy      400
neutral    400
sad        400
Name: count, dtype: int64


In [7]:
savee_directory_list = os.listdir(Savee)

file_emotion = []
file_path = []

for file in savee_directory_list:
    file_path.append(Savee + file)
    part = file.split('_')[1]
    ele = part[:-6]
    if ele=='a':
        file_emotion.append('angry')
    elif ele=='d':
        file_emotion.append('disgust')
    elif ele=='f':
        file_emotion.append('fear')
    elif ele=='h':
        file_emotion.append('happy')
    elif ele=='n':
        file_emotion.append('neutral')
    elif ele=='sa':
        file_emotion.append('sad')
    else:
        file_emotion.append('surprise')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Savee_df = pd.concat([emotion_df, path_df], axis=1)
selected_emotions = ['neutral', 'sad', 'happy', 'angry']
selected_save_df = Tess_df[Tess_df['Emotions'].isin(selected_emotions)]

selected_save_df.head()
print(selected_save_df.Emotions.value_counts())


Emotions
angry      400
happy      400
neutral    400
sad        400
Name: count, dtype: int64


In [8]:
data_path = pd.concat([selected_ravdess_df, selected_tess_df, selected_save_df, selected_crema_df], axis = 0)
data_path.to_csv("data_path.csv",index=False)
data_path.head()

Unnamed: 0,Emotions,Path
0,neutral,Z:/dev/machine/audio_speech_actors_01-24/Actor...
1,neutral,Z:/dev/machine/audio_speech_actors_01-24/Actor...
2,neutral,Z:/dev/machine/audio_speech_actors_01-24/Actor...
3,neutral,Z:/dev/machine/audio_speech_actors_01-24/Actor...
4,neutral,Z:/dev/machine/audio_speech_actors_01-24/Actor...


In [25]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate=rate)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=pitch_factor)


In [26]:
def extract_features(data):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally
    
    return result

def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
    
    # without augmentation
    res1 = extract_features(data)
    result = np.array(res1)
    
    # data with noise
    noise_data = noise(data)
    res2 = extract_features(noise_data)
    result = np.vstack((result, res2)) # stacking vertically
    
    # data with stretching and pitching
    new_data = stretch(data)
    data_stretch_pitch = pitch(new_data, sample_rate)
    res3 = extract_features(data_stretch_pitch)
    result = np.vstack((result, res3)) # stacking vertically
    
    return result

In [27]:
import timeit
from tqdm import tqdm
start = timeit.default_timer()
X,Y=[],[]
for path,emotion,index in tqdm (zip(data_path.Path,data_path.Emotions,range(data_path.Path.shape[0]))):
    features=get_features(path)
    if index%500==0:
        print(f'{index} audio has been processed')
    for i in features:
        X.append(i)
        Y.append(emotion)
print('Done')
stop = timeit.default_timer()

print('Time: ', stop - start)   

2it [00:00,  7.91it/s]

0 audio has been processed


502it [01:08,  7.25it/s]

500 audio has been processed


1002it [02:08, 13.37it/s]

1000 audio has been processed


1502it [02:51,  9.40it/s]

1500 audio has been processed


2002it [03:39, 12.12it/s]

2000 audio has been processed


2504it [04:25, 15.27it/s]

2500 audio has been processed


3002it [05:03, 11.83it/s]

3000 audio has been processed


3502it [05:49, 12.96it/s]

3500 audio has been processed


4002it [06:32, 10.46it/s]

4000 audio has been processed


4502it [07:31,  8.94it/s]

4500 audio has been processed


5002it [08:25, 10.56it/s]

5000 audio has been processed


5502it [09:18,  8.61it/s]

5500 audio has been processed


6002it [10:12,  9.42it/s]

6000 audio has been processed


6502it [11:06, 10.16it/s]

6500 audio has been processed


7002it [11:58,  9.47it/s]

7000 audio has been processed


7502it [12:52,  7.99it/s]

7500 audio has been processed


8002it [13:45, 10.39it/s]

8000 audio has been processed


  return pitch_tuning(
8501it [14:40,  8.55it/s]

8500 audio has been processed


8964it [15:31,  9.62it/s]

Done
Time:  931.640261





In [28]:
len(X), len(Y), data_path.Path.shape

(26892, 26892, (8964,))

In [29]:
Emotions = pd.DataFrame(X)
Emotions['Emotions'] = Y
Emotions.to_csv('emotion.csv', index=False)
Emotions.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,153,154,155,156,157,158,159,160,161,Emotions
0,0.321275,0.729664,0.750033,0.730624,0.735275,0.713529,0.660531,0.684966,0.733049,0.753972,...,4.310903e-06,3.291511e-06,2.148075e-06,2.279739e-06,5.116492e-06,8.190282e-06,7e-06,5e-06,4.245834e-07,neutral
1,0.316031,0.810181,0.809943,0.80369,0.820234,0.79746,0.68432,0.69694,0.741394,0.774245,...,4.915551e-05,4.840747e-05,5.131785e-05,5.286839e-05,5.278305e-05,5.42745e-05,5.6e-05,5.6e-05,4.987747e-05,neutral
2,0.188227,0.622132,0.699217,0.75334,0.721217,0.701731,0.682358,0.662839,0.686496,0.73397,...,8.579046e-07,9.576654e-07,7.733597e-07,5.2331e-07,3.593209e-07,9.263777e-07,2e-06,1e-06,7.753991e-08,neutral
3,0.293566,0.673896,0.722096,0.723508,0.682302,0.680533,0.675352,0.628977,0.679179,0.707283,...,6.984504e-06,7.034949e-06,6.654922e-06,6.979548e-06,1.214236e-05,9.640185e-06,1.1e-05,6e-06,4.254087e-07,neutral
4,0.299176,0.766928,0.798759,0.779664,0.770594,0.778229,0.684694,0.640049,0.698336,0.746909,...,2.902367e-05,2.99837e-05,3.224235e-05,3.128442e-05,3.616239e-05,3.367691e-05,3.6e-05,3e-05,2.526569e-05,neutral
