In [1]:
!pip install audformat
!pip install opensmile
!pip install Keras-Preprocessing



In [2]:
!wget https://zenodo.org/record/7447302/files/emodb.zip

!unzip emodb.zip

--2023-11-21 13:45:56--  https://zenodo.org/record/7447302/files/emodb.zip
Resolving zenodo.org (zenodo.org)... 188.184.98.238, 188.185.79.172, 188.184.103.159, ...
Connecting to zenodo.org (zenodo.org)|188.184.98.238|:443... connected.
HTTP request sent, awaiting response... 301 MOVED PERMANENTLY
Location: /records/7447302/files/emodb.zip [following]
--2023-11-21 13:45:57--  https://zenodo.org/records/7447302/files/emodb.zip
Reusing existing connection to zenodo.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 39981818 (38M) [application/octet-stream]
Saving to: ‘emodb.zip.2’


2023-11-21 13:46:00 (15.3 MB/s) - ‘emodb.zip.2’ saved [39981818/39981818]

Archive:  emodb.zip
replace emodb/db.emotion.categories.train.gold_standard.pkl? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: emodb/db.emotion.categories.train.gold_standard.pkl  
replace emodb/wav/14b01Fc.wav? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: emodb/wav/14b01Fc.wav   
replace emodb/wav/14a02Ea.w

**Loading libraries**

In [3]:
import audformat

import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE

from keras_preprocessing.sequence import pad_sequences
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
import keras
from keras import layers
from keras.callbacks import ModelCheckpoint, EarlyStopping

import opensmile

import pandas as pd
import matplotlib.pyplot as plt
import torch

**Loading dataset**

using the gold standard train and test data

In [4]:

db = audformat.Database.load('emodb')

df_test = db['emotion.categories.test.gold_standard'].get()

df_train = db['emotion.categories.train.gold_standard'].get()

print(f'samples in train: {df_train.shape[0]}, and test: {df_test.shape[0]}')



samples in train: 304, and test: 231


In [5]:
# Encode the emotion words as numbers and use this as target
target = 'label'
encoder = LabelEncoder()
encoder.fit(df_train['emotion'])
df_train[target] = encoder.transform(df_train['emotion'])
df_test[target] = encoder.transform(df_test['emotion'])

In [6]:
df_test[target]

file
wav/12a01Fb.wav    4
wav/12a01Lb.wav    1
wav/12a01Nb.wav    5
wav/12a01Wc.wav    0
wav/12a02Ac.wav    3
                  ..
wav/16b10Lb.wav    1
wav/16b10Tb.wav    6
wav/16b10Td.wav    6
wav/16b10Wa.wav    0
wav/16b10Wb.wav    0
Name: label, Length: 231, dtype: int64

Using opensmile to extract raw features

In [7]:


smile = opensmile.Smile(
    opensmile.FeatureSet.ComParE_2016,
    opensmile.FeatureLevel.Functionals,
    sampling_rate=16000,
    resample=True,
    num_workers=5,
    verbose=True,
)

X_train = smile.process_index(
    df_train.index,
    root=db.root,

)
X_test = smile.process_index(
    df_test.index,
    root=db.root,

)




In [8]:
X_train.head

<bound method NDFrame.head of                                                   audspec_lengthL1norm_sma_range  \
file            start  end                                                         
wav/03a01Fa.wav 0 days 0 days 00:00:01.898250                           2.935072   
wav/03a01Nc.wav 0 days 0 days 00:00:01.611250                           3.087914   
wav/03a01Wa.wav 0 days 0 days 00:00:01.877812500                        3.673083   
wav/03a02Fc.wav 0 days 0 days 00:00:02.006250                           3.149658   
wav/03a02Nc.wav 0 days 0 days 00:00:01.439812500                        2.437313   
...                                                                          ...   
wav/13b10Fa.wav 0 days 0 days 00:00:02.158562500                        3.092541   
wav/13b10La.wav 0 days 0 days 00:00:02.348437500                        2.381379   
wav/13b10Nc.wav 0 days 0 days 00:00:02.367812500                        2.680195   
wav/13b10Wa.wav 0 days 0 days 00:00:02.2005625

In [9]:
labels_encoded = {'anger':0, 'boredom':1, 'disgust':2, 'fear':3, 'happiness':4, 'sadness':5, 'neutral':6}
df_train['label']=df_train['emotion'].apply(lambda x:labels_encoded[x])
df_test['label']=df_test['emotion'].apply(lambda x:labels_encoded[x])

y_train=np.array(df_train.label)
y_test=np.array(df_test.label)

In [10]:
print(y_train.shape)
print(y_test.shape)

(304,)
(231,)


In [11]:
# Perform a standard scaling / z-transformation on the features (mean=0, std=1)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = pd.DataFrame(scaler.transform(X_train))
X_test = pd.DataFrame(scaler.transform(X_test))

**Balancing classes**

In [12]:
# # balance train classes
sm = SMOTE()
X_train, y_train = sm.fit_resample(X_train, y_train)

In [13]:
encoder = OneHotEncoder(sparse=False)
y_train= y_train.reshape(-1, 1)
print(y_train.shape)
y_train = encoder.fit_transform(y_train)
print(y_train.shape)
y_test= y_test.reshape(-1, 1)
print(y_test.shape)
y_test = encoder.fit_transform(y_test)
print(y_test.shape)

(504, 1)
(504, 7)
(231, 1)
(231, 7)




In [14]:
print(y_test[0],df_test.label[0])

[0. 0. 0. 0. 1. 0. 0.] 4


In [15]:
model = models.Sequential([
    layers.Dense(16, activation='relu'),
    # layers.Dense(128, activation='relu'),
    layers.Dense(len(labels_encoded), activation='softmax')  # Output layer with number of emotions
])

In [16]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# print(str(model.summary()))

In [17]:
best_weights_file = "nn_weights.h5"
es = EarlyStopping(monitor='val_loss', mode='min', verbose=2, patience=10)
mc = ModelCheckpoint(best_weights_file, monitor='val_loss', mode='min', verbose=2,
                         save_best_only=True)
model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2,callbacks=[es, mc])

Epoch 1/30
Epoch 1: val_loss improved from inf to 0.19087, saving model to nn_weights.h5
Epoch 2/30

  saving_api.save_model(



Epoch 2: val_loss improved from 0.19087 to 0.06215, saving model to nn_weights.h5
Epoch 3/30
Epoch 3: val_loss improved from 0.06215 to 0.03364, saving model to nn_weights.h5
Epoch 4/30
Epoch 4: val_loss improved from 0.03364 to 0.00821, saving model to nn_weights.h5
Epoch 5/30
Epoch 5: val_loss improved from 0.00821 to 0.00561, saving model to nn_weights.h5
Epoch 6/30
Epoch 6: val_loss improved from 0.00561 to 0.00500, saving model to nn_weights.h5
Epoch 7/30
Epoch 7: val_loss improved from 0.00500 to 0.00445, saving model to nn_weights.h5
Epoch 8/30
Epoch 8: val_loss improved from 0.00445 to 0.00402, saving model to nn_weights.h5
Epoch 9/30
Epoch 9: val_loss improved from 0.00402 to 0.00366, saving model to nn_weights.h5
Epoch 10/30
Epoch 10: val_loss improved from 0.00366 to 0.00333, saving model to nn_weights.h5
Epoch 11/30
Epoch 11: val_loss improved from 0.00333 to 0.00310, saving model to nn_weights.h5
Epoch 12/30
Epoch 12: val_loss improved from 0.00310 to 0.00284, saving mode

<keras.src.callbacks.History at 0x7e2f6c10a080>

In [19]:
# test acc and loss
model.load_weights(best_weights_file) # load the best saved model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy',keras.metrics.Recall()])
test_metrics = model.evaluate(X_test, y_test, batch_size=8)
print("\n%s: %.2f%%" % ("test " + model.metrics_names[1], test_metrics[1] * 100))
print("%s: %.2f" % ("test " + model.metrics_names[2], test_metrics[2]* 100))


test accuracy: 76.62%
test recall: 75.32
