In [1]:
import pandas as pd
import numpy as np

import os
import sys

# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
from scipy.fft import fft, ifft
from spafe.features.gfcc import gfcc
from pyAudioProcessing import extract_features
import seaborn as sns
import matplotlib.pyplot as plt
import skimage.io
from skimage.transform import  resize

from sklearn.preprocessing import StandardScaler, OneHotEncoder, normalize
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# to play the audio files
from IPython.display import Audio

import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization, LSTM
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [2]:
# Paths for data.
Ravdess = "C:/Users/Andrzej/Desktop/Magisterka/datasets/ravdess/audio_speech_actors_01-24/"
Crema = "C:/Users/Andrzej/Desktop/Magisterka/datasets/cremad/AudioWAV/"
Savee = "C:/Users/Andrzej/Desktop/Magisterka/datasets/savee/"

In [3]:
ravdess_directory_list = os.listdir(Ravdess)

file_emotion = []
file_path = []
for dir in ravdess_directory_list:
    # as their are 20 different actors in our previous directory we need to extract files for each actor.
    actor = os.listdir(Ravdess + dir)
    for file in actor:
        part = file.split('.')[0]
        part = part.split('-')
        # third part in each file represents the emotion associated to that file.
        file_emotion.append(int(part[2]))
        file_path.append(Ravdess + dir + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Ravdess_df = pd.concat([emotion_df, path_df], axis=1)

# changing integers to actual emotions.
Ravdess_df.Emotions.replace({1:'neutral', 2:'neutral', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust'}, inplace=True)
Ravdess_df.head()

Unnamed: 0,Emotions,Path
0,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...
1,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...
2,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...
3,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...
4,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...


In [4]:
crema_directory_list = os.listdir(Crema)

file_emotion = []
file_path = []

for file in crema_directory_list:
    # storing file paths
    file_path.append(Crema + file)
    # storing file emotions
    part = file.split('_')
    if part[2] == 'SAD':
        file_emotion.append('sad')
    elif part[2] == 'ANG':
        file_emotion.append('angry')
    elif part[2] == 'DIS':
        file_emotion.append('disgust')
    elif part[2] == 'FEA':
        file_emotion.append('fear')
    elif part[2] == 'HAP':
        file_emotion.append('happy')
    elif part[2] == 'NEU':
        file_emotion.append('neutral')
    else:
        file_emotion.append('Unknown')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Crema_df = pd.concat([emotion_df, path_df], axis=1)
Crema_df.head()

Unnamed: 0,Emotions,Path
0,angry,C:/Users/Andrzej/Desktop/Magisterka/datasets/c...
1,disgust,C:/Users/Andrzej/Desktop/Magisterka/datasets/c...
2,fear,C:/Users/Andrzej/Desktop/Magisterka/datasets/c...
3,happy,C:/Users/Andrzej/Desktop/Magisterka/datasets/c...
4,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/c...


In [5]:
# savee_directory_list = os.listdir(Savee)

# file_emotion = []
# file_path = []

# for file in savee_directory_list:
#     file_path.append(Savee + file)
#     part = file.split('_')[1]
#     ele = part[:-6]
#     if ele == 'a':
#         file_emotion.append('angry')
#     elif ele == 'd':
#         file_emotion.append('disgust')
#     elif ele == 'f':
#         file_emotion.append('fear')
#     elif ele == 'h':
#         file_emotion.append('happy')
#     elif ele == 'n':
#         file_emotion.append('neutral')
#     elif ele == 'sa':
#         file_emotion.append('sad')
#     else:
#         pass
        
# # dataframe for emotion of files
# emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# # dataframe for path of files.
# path_df = pd.DataFrame(file_path, columns=['Path'])
# Savee_df = pd.concat([emotion_df, path_df], axis=1)
# Savee_df.head()

In [6]:
# creating Dataframe using all the 4 dataframes we created so far.
data_path = pd.concat([Ravdess_df, Crema_df], axis = 0)
data_path.to_csv("data_path.csv",index=False)
data_path.head()

Unnamed: 0,Emotions,Path
0,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...
1,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...
2,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...
3,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...
4,neutral,C:/Users/Andrzej/Desktop/Magisterka/datasets/r...


In [7]:
def scale_minmax(X, min=0.0, max=1.0):
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled

def spectrogram_image(y, sr, out, hop_length, n_mels, emotion):
    # use log-melspectrogram
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels,
                                            n_fft=hop_length*2, hop_length=hop_length)
    mels = np.log(mels + 1e-9) # add small number to avoid log(0)

    # min-max scale to fit inside 8-bit range
    img = scale_minmax(mels, 0, 255).astype(np.uint8)
    img = np.flip(img, axis=0).astype(np.uint8) # put low frequencies at the bottom in image
    img = 255-img # invert. make black==more energy
    img = resize(img, (128, 108), anti_aliasing = False)

    # save as PNG
    skimage.io.imsave('spectrograms/' + emotion + '/' + out, img)

In [8]:
# settings
hop_length = 512 # number of samples per time-step in spectrogram
n_mels = 128 # number of bins in spectrogram. Height of image
time_steps = 256 # number of time-steps. Width of image

# extract a fixed length window
start_sample = 0 # starting at beginning
length_samples = time_steps*hop_length

In [9]:
%%capture
i = 0
for path, emotion in zip(data_path.Path, data_path.Emotions):
    data, sample_rate = librosa.load(path, duration = 2.5, offset = 0.5)
    out = str(emotion) + '_' + str(i) + '.png'
    window = data[start_sample:start_sample+length_samples]
    spectrogram_image(data, sr=sample_rate, out=out, hop_length=hop_length, n_mels=n_mels, emotion=emotion)
    i += 1