<a href="https://colab.research.google.com/github/bagustris/ravdess_song/blob/main/ravdess_song_sd_fc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RAVDESS Song Emotion Recognition

In [None]:
import glob
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [None]:
!git clone https://github.com/bagustris/ravdess_song.git

Cloning into 'ravdess_song'...
remote: Enumerating objects: 1107, done.[K
remote: Counting objects: 100% (50/50), done.[K
remote: Compressing objects: 100% (40/40), done.[K
remote: Total 1107 (delta 23), reused 25 (delta 10), pack-reused 1057[K
Receiving objects: 100% (1107/1107), 230.81 MiB | 25.02 MiB/s, done.
Resolving deltas: 100% (27/27), done.
Checking out files: 100% (1029/1029), done.


In [None]:
cd ravdess_song/

/content/ravdess_song


In [None]:
# list all files in the dataset
data_path = 'archive'
files = glob.glob(os.path.join(data_path + '/*/', '*.wav'))
files.sort()

In [None]:
# check by showing the first file
files[0]

'archive/Actor_01/03-02-01-01-01-01-01.wav'

In [None]:
# function to extract feature
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name, sr=None)
    stft = np.abs(librosa.stft(X))
    mfcc = np.mean(librosa.feature.mfcc(
        y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
    mfcc_std = np.std(librosa.feature.mfcc(
        y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(
        S=stft, sr=sample_rate).T, axis=0)
    chroma_std = np.std(librosa.feature.chroma_stft(
        S=stft, sr=sample_rate).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
    mel_std = np.std(librosa.feature.melspectrogram(
        X, sr=sample_rate).T, axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(
        S=stft, sr=sample_rate).T, axis=0)
    contrast_std = np.std(librosa.feature.spectral_contrast(
        S=stft, sr=sample_rate).T, axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(
        y=librosa.effects.harmonic(X), sr=sample_rate).T, axis=0)
    tonnetz_std = np.std(librosa.feature.tonnetz(
        y=librosa.effects.harmonic(X), sr=sample_rate).T, axis=0)
    return (mfcc, chroma, mel, contrast, tonnetz,
            mfcc_std, chroma_std, mel_std, contrast_std, tonnetz_std)

In [None]:
# create empty list to store features and labels
feat = []
lab = []

In [None]:
# iterate over all files
for file in files:
    print("processing ...", file)
    feat_i = np.hstack(extract_feature(file))
    lab_i = os.path.basename(file).split('-')[2]
    feat.append(feat_i)
    lab.append(int(lab_i)-1)  # make labels start from 0

processing ... archive/Actor_01/03-02-01-01-01-01-01.wav
processing ... archive/Actor_01/03-02-01-01-01-02-01.wav
processing ... archive/Actor_01/03-02-01-01-02-01-01.wav
processing ... archive/Actor_01/03-02-01-01-02-02-01.wav
processing ... archive/Actor_01/03-02-02-01-01-01-01.wav
processing ... archive/Actor_01/03-02-02-01-01-02-01.wav
processing ... archive/Actor_01/03-02-02-01-02-01-01.wav
processing ... archive/Actor_01/03-02-02-01-02-02-01.wav
processing ... archive/Actor_01/03-02-02-02-01-01-01.wav
processing ... archive/Actor_01/03-02-02-02-01-02-01.wav
processing ... archive/Actor_01/03-02-02-02-02-01-01.wav
processing ... archive/Actor_01/03-02-02-02-02-02-01.wav
processing ... archive/Actor_01/03-02-03-01-01-01-01.wav
processing ... archive/Actor_01/03-02-03-01-01-02-01.wav
processing ... archive/Actor_01/03-02-03-01-02-01-01.wav
processing ... archive/Actor_01/03-02-03-01-02-02-01.wav
processing ... archive/Actor_01/03-02-03-02-01-01-01.wav
processing ... archive/Actor_01

In [None]:
len(feat)
len(lab)

In [None]:
# assign hsf to X, lab to y; reshape X for LSTM
X = np.array(feat)
y = np.array(lab)

In [None]:
# reshape x untuk lstm
X = X.reshape((X.shape[0], 1, X.shape[1]))

In [None]:
# split into train and test
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [None]:
# functio to build MLP/FC model}
def model_dense():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.BatchNormalization(axis=-1,
              input_shape=(x_train.shape[1], x_train.shape[2])))
    model.add(tf.keras.layers.Dense(256, activation='relu'))
    model.add(tf.keras.layers.Dense(256, activation='relu'))
    model.add(tf.keras.layers.Dense(256, activation='relu')))
    model.add(tf.keras.layers.Dropout(0.4))
    model.add(tf.keras.layers.Dense(6, activation='softmax'))

    # compile model: set loss, optimizer, metric
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  optimizer=tf.keras.optimizers.Adam(),
                  metrics=['accuracy'])
    return model


In [None]:
# create the model
model = model_dense()
print(model.summary())

In [None]:
# train the Dense model
hist = model.fit(x_train, y_train, epochs=100, shuffle=True, batch_size=16)

In [None]:
evaluate = model.evaluate(x_test, y_test, batch_size=16)
print("Loss: ", evaluate[0], "--> Accuracy: ", evaluate[1])