# Load files

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
import IPython
from IPython.display import Audio
from IPython.display import Image
import matplotlib.pyplot as plt
import seaborn as sns
import sys

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
# path for speech emotion data

Ravdess = "/content/drive/MyDrive/SER/Ravdess/audio_speech_actors_01-24/"
# Crema = "/content/drive/MyDrive/SER/Crema/"
Tess = "/content/drive/MyDrive/SER/Tess/"
Savee = "/content/drive/MyDrive/SER/Savee/"

##################################################################################################

ravdess_directory_list = os.listdir(Ravdess)

file_emotion = []
file_path = []

for dir in ravdess_directory_list:
    actor = os.listdir(Ravdess + dir)
    for file in actor:
        part = file.split('.')[0] # e.g., 03-01-06-01-01-01-01
        part = part.split('-')
        # extract the third section as emotion
        file_emotion.append(int(part[2]))
        file_path.append(Ravdess + dir + '/' + file)

emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
path_df = pd.DataFrame(file_path, columns=['Path'])
Ravdess_df = pd.concat([emotion_df, path_df], axis=1)

Ravdess_df.Emotions.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', \
                             7:'disgust', 8:'surprise'}, inplace=True)

#################################################################################################################

# crema_directory_list = os.listdir(Crema)

# file_emotion = []
# file_path = []

# for file in crema_directory_list:
#     file_path.append(Crema + file)

#     part = file.split('_')[2]
#     if part == 'SAD':
#         file_emotion.append('sad')
#     elif part == 'ANG':
#         file_emotion.append('angry')
#     elif part == 'DIS':
#         file_emotion.append('disgust')
#     elif part == 'FEA':
#         file_emotion.append('fear')
#     elif part == 'HAP':
#         file_emotion.append('happy')
#     elif part == 'NEU':
#         file_emotion.append('neutral')
#     else:
#         file_emotion.append('Unknown')

# emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
# path_df = pd.DataFrame(file_path, columns=['Path'])
# Crema_df = pd.concat([emotion_df, path_df], axis=1)

###################################################################################################

tess_directory_list = os.listdir(Tess)

file_emotion = []
file_path = []

for dir in tess_directory_list:
    directories = os.listdir(Tess + dir)
    for file in directories:
        part = file.split('.')[0]
        part = part.split('_')[2]
        if part == 'ps':
            file_emotion.append('surprise')
        else:
            file_emotion.append(part)

        file_path.append(Tess + dir + '/' + file)

emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
path_df = pd.DataFrame(file_path, columns=['Path'])
Tess_df = pd.concat([emotion_df, path_df], axis=1)

###########################################################################################

savee_directory_list = os.listdir(Savee)

file_emotion = []
file_path = []

for file in savee_directory_list:
    file_path.append(Savee + file)

    part = file.split('_')[1][:2]
    if part.__contains__('su'):
        file_emotion.append('surprise')
    elif part[0] == 'a':
        file_emotion.append('angry')
    elif part[0] == 'h':
        file_emotion.append('happy')
    elif part[0] == 's':
        file_emotion.append('sad')
    elif part[0] == 'n':
        file_emotion.append('neutral')
    elif part[0] == 'f':
        file_emotion.append('fear')
    else:
        file_emotion.append('disgust')

emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
path_df = pd.DataFrame(file_path, columns=['Path'])
Savee_df = pd.concat([emotion_df, path_df], axis=1)

##################################################################################################

data_path = pd.concat([Ravdess_df, Tess_df, Savee_df], axis=0, ignore_index=True)
data_path.Emotions.replace({'neutral':1, 'calm':2, 'happy':3, 'sad':4, 'angry':5, 'fear':6, \
                            'disgust':7, 'surprise':0}, inplace=True)
# data_path.Emotions.replace({'neutral':1, 'happy':2, 'sad':3, 'angry':4, 'fear':5, \
#                             'disgust':6, 'surprise':0}, inplace=True)

#data_path = data_path[data_path.Emotions.isin([7, 3, 4, 6, 5, 1])]
data_path

In [None]:
EMOTIONS = {1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 0:'surprise'}

fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(x=range(8), height=data_path['Emotions'].value_counts())
ax.set_xticks(ticks=range(8))
ax.set_xticklabels([EMOTIONS[i] for i in range(8)],fontsize=10)
ax.set_xlabel('Emotion')
ax.set_ylabel('Number of examples')

In [None]:
plt.title('Count of Emotions', size=16)
sns.countplot(x=data_path["Emotions"])
plt.ylabel('Count', size=12)
plt.xlabel('Emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()

# Load the signals

In [None]:
mel_spectrograms = []

SAMPLE_RATE = 48000

savee_signals = []
for i, file_path in enumerate(Savee_df.Path):
    audio, sample_rate = librosa.load(file_path, duration=2, offset=0.3, sr=SAMPLE_RATE)
    signal = np.zeros((int(SAMPLE_RATE*2,)))
    signal[:len(audio)] = audio[:SAMPLE_RATE*2]
    savee_signals.append(signal)
    print("\r Processed {}/{} {} files".format(i,len(Savee_df), "Savee"),end='')
savee_signals = np.stack(savee_signals,axis=0)

tess_signals = []
for i, file_path in enumerate(Tess_df.Path):
    audio, sample_rate = librosa.load(file_path, duration=2, offset=0, sr=SAMPLE_RATE)
    signal = np.zeros((int(SAMPLE_RATE*2,)))
    signal[:len(audio)] = audio
    tess_signals.append(signal)
    print("\r Processed {}/{} {} files".format(i,len(Tess_df), "Tess"),end='')
tess_signals = np.stack(tess_signals,axis=0)

ravdess_signals = []
for i, file_path in enumerate(Ravdess_df.Path):
    audio, sample_rate = librosa.load(file_path, duration=2, offset=0.75, sr=SAMPLE_RATE)
    signal = np.zeros((int(SAMPLE_RATE*2,)))
    signal[:len(audio)] = audio
    ravdess_signals.append(signal)
    print("\r Processed {}/{} {} files".format(i,len(Ravdess_df), "Ravdess"),end='')
ravdess_signals = np.stack(ravdess_signals,axis=0)

total_signals = np.vstack((ravdess_signals, tess_signals, savee_signals))
total_signals.shape

# Split the data

In [None]:
X = total_signals
train_ind,test_ind,val_ind = [],[],[]
X_train,X_val,X_test = [],[],[]
Y_train,Y_val,Y_test = [],[],[]
for emotion in range(len(EMOTIONS)):
    emotion_ind = list(data_path.loc[data_path.Emotions==emotion,'Emotions'].index)
    emotion_ind = np.random.permutation(emotion_ind)
    m = len(emotion_ind)
    ind_train = emotion_ind[:int(0.8*m)]
    ind_val = emotion_ind[int(0.8*m):int(0.9*m)]
    ind_test = emotion_ind[int(0.9*m):]

    X_train.append(X[ind_train,:])
    Y_train.append(np.array([emotion]*len(ind_train),dtype=np.int32))
    X_val.append(X[ind_val,:])
    Y_val.append(np.array([emotion]*len(ind_val),dtype=np.int32))
    X_test.append(X[ind_test,:])
    Y_test.append(np.array([emotion]*len(ind_test),dtype=np.int32))
    train_ind.append(ind_train)
    test_ind.append(ind_test)
    val_ind.append(ind_val)
X_train = np.concatenate(X_train,0)
X_val = np.concatenate(X_val,0)
X_test = np.concatenate(X_test,0)
Y_train = np.concatenate(Y_train,0)
Y_val = np.concatenate(Y_val,0)
Y_test = np.concatenate(Y_test,0)
train_ind = np.concatenate(train_ind,0)
val_ind = np.concatenate(val_ind,0)
test_ind = np.concatenate(test_ind,0)
print(f'X_train:{X_train.shape}, Y_train:{Y_train.shape}')
print(f'X_val:{X_val.shape}, Y_val:{Y_val.shape}')
print(f'X_test:{X_test.shape}, Y_test:{Y_test.shape}')
# check if all are unique
unique, count = np.unique(np.concatenate([train_ind,test_ind,val_ind],0), return_counts=True)
print("Number of unique indexes is {}, out of {}".format(sum(count==1), X.shape[0]))

del X

# Data Augmentation

In [None]:
def addAWGN(signal, num_bits=16, augmented_num=2, snr_low=15, snr_high=30):
    signal_len = len(signal)
    # Generate White Gaussian noise
    noise = np.random.normal(size=(augmented_num, signal_len))
    # Normalize signal and noise
    norm_constant = 2.0**(num_bits-1)
    signal_norm = signal / norm_constant
    noise_norm = noise / norm_constant
    # Compute signal and noise power
    s_power = np.sum(signal_norm ** 2) / signal_len
    n_power = np.sum(noise_norm ** 2, axis=1) / signal_len
    # Random SNR: Uniform [15, 30] in dB
    target_snr = np.random.randint(snr_low, snr_high)
    # Compute K (covariance matrix) for each noise
    K = np.sqrt((s_power / n_power) * 10 ** (- target_snr / 10))
    K = np.ones((signal_len, augmented_num)) * K
    # Generate noisy signal
    return signal + K.T * noise

In [None]:
aug_signals = []
aug_labels = []
for i in range(X_train.shape[0]):
    signal = X_train[i,:]
    augmented_signals = addAWGN(signal)
    for j in range(augmented_signals.shape[0]):
        aug_labels.append(data_path.loc[i,"Emotions"])
        aug_signals.append(augmented_signals[j,:])
        data_path = data_path.append(data_path.iloc[i], ignore_index=True)
    print("\r Processed {}/{} files".format(i,X_train.shape[0]),end='')
aug_signals = np.stack(aug_signals,axis=0)
X_train = np.concatenate([X_train,aug_signals],axis=0)
aug_labels = np.stack(aug_labels,axis=0)
Y_train = np.concatenate([Y_train,aug_labels])
print('')
print(f'X_train:{X_train.shape}, Y_train:{Y_train.shape}')

# Calculate mel spectrograms

In [None]:
def getMELspectrogram(audio, sample_rate):
    mel_spec = librosa.feature.melspectrogram(y=audio,
                          sr=sample_rate,
                          n_fft=1024,
                          win_length = 512,
                          window='hamming',
                          n_mels=148,
                          fmax=sample_rate/2
                          )
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_spec_db

# test function
emotion = 1
path = np.array(data_path.Path[data_path.Emotions==emotion])[50]
print(path)
audio, sample_rate = librosa.load(path, duration=2, offset=0.75, sr=SAMPLE_RATE)
signal[:len(audio)] = audio[:SAMPLE_RATE*2]
mel_spectrogram = getMELspectrogram(signal, SAMPLE_RATE)
librosa.display.specshow(mel_spectrogram, y_axis='mel', x_axis='time')
print('MEL spectrogram shape: ',mel_spectrogram.shape)

In [None]:
mel_train = []
print("Calculatin mel spectrograms for train set")
for i in range(X_train.shape[0]):
    mel_spectrogram = getMELspectrogram(X_train[i,:], sample_rate=SAMPLE_RATE)
    mel_train.append(mel_spectrogram)
    print("\r Processed {}/{} files".format(i,X_train.shape[0]),end='')
print('')
mel_train = np.stack(mel_train,axis=0)
del X_train
X_train = mel_train

mel_val = []
print("Calculatin mel spectrograms for val set")
for i in range(X_val.shape[0]):
    mel_spectrogram = getMELspectrogram(X_val[i,:], sample_rate=SAMPLE_RATE)
    mel_val.append(mel_spectrogram)
    print("\r Processed {}/{} files".format(i,X_val.shape[0]),end='')
print('')
mel_val = np.stack(mel_val,axis=0)
del X_val
X_val = mel_val

mel_test = []
print("Calculatin mel spectrograms for test set")
for i in range(X_test.shape[0]):
    mel_spectrogram = getMELspectrogram(X_test[i,:], sample_rate=SAMPLE_RATE)
    mel_test.append(mel_spectrogram)
    print("\r Processed {}/{} files".format(i,X_test.shape[0]),end='')
print('')
mel_test = np.stack(mel_test,axis=0)
del X_test
X_test = mel_test

print(f'X_train:{X_train.shape}, Y_train:{Y_train.shape}')
print(f'X_val:{X_val.shape}, Y_val:{Y_val.shape}')
print(f'X_test:{X_test.shape}, Y_test:{Y_test.shape}')

stack data

In [None]:
X_train = np.expand_dims(X_train,1)
X_val = np.expand_dims(X_val,1)
X_test = np.expand_dims(X_test,1)

scaler = StandardScaler()

b,c,h,w = X_train.shape
X_train = np.reshape(X_train, newshape=(b,-1))
X_train = scaler.fit_transform(X_train)
X_train = np.reshape(X_train, newshape=(b,c,h,w))
print('Shape of X_train: ',X_train.shape)

b,c,h,w = X_test.shape
X_test = np.reshape(X_test, newshape=(b,-1))
X_test = scaler.transform(X_test)
X_test = np.reshape(X_test, newshape=(b,c,h,w))
print('Shape of X_test: ',X_test.shape)

b,c,h,w = X_val.shape
X_val = np.reshape(X_val, newshape=(b,-1))
X_val = scaler.transform(X_val)
X_val = np.reshape(X_val, newshape=(b,c,h,w))
print('Shape of X_val: ',X_val.shape)

np.save(file="/content/drive/MyDrive/serdl/notebooks/3_dataset_mel/xtrain.npy", arr=X_train)
np.save(file="/content/drive/MyDrive/serdl/notebooks/3_dataset_mel/xval.npy", arr=X_val)
np.save(file="/content/drive/MyDrive/serdl/notebooks/3_dataset_mel/xtest.npy", arr=X_test)
np.save(file="/content/drive/MyDrive/serdl/notebooks/3_dataset_mel/ytrain.npy", arr=Y_train)
np.save(file="/content/drive/MyDrive/serdl/notebooks/3_dataset_mel/yval.npy", arr=Y_val)
np.save(file="/content/drive/MyDrive/serdl/notebooks/3_dataset_mel/ytest.npy", arr=Y_test)