In [1]:
import pickle

import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from tqdm.contrib import tzip

from helper_functions import get_tess_df, get_features, get_crema_df, get_my_audio

%load_ext autoreload
%autoreload 2

In [2]:
my_audio_df = get_my_audio()

In [3]:
tess_df = get_tess_df()
crema_df = get_crema_df()

data_path = pd.concat([crema_df, tess_df], axis=0)

In [None]:
from helper_functions import create_images

emotion='angry'
path = "TESS/OAF_angry/OAF_back_angry.wav"
create_images([path, "TESS/OAF_Fear/OAF_back_fear.wav"], [emotion, 'fear'], figsize=(6, 10))

In [None]:
plt.title('Count of Emotions', size=16)
sns.countplot(x='Emotions', data=data_path)
plt.ylabel('Count', size=12)
plt.xlabel('Emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()

In [None]:
X, Y = [], []

for path, emotion in tzip(data_path.Path, data_path.Emotions):
    feature = get_features(path)
    for ele in feature:
        X.append(ele)
        # appending emotion 3 times as we have made 3 augmentation techniques on each audio file.
        Y.append(emotion)

In [None]:
print(X[1])

In [None]:
#pickle.dump((X, Y), open('extracted_features.pkl', 'wb'))

In [4]:
X, Y = pickle.load(open('extracted_features.pkl', 'rb'))

In [None]:
len(X), len(Y), data_path.shape

In [5]:
from helper_functions import encode_emotion
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

features = pd.DataFrame(X)
features['labels'] = Y
#features.to_csv('features.csv', index=False)
print(features.head())

X_t = features.iloc[:, :-1].values
Y_tt = features['labels'].values

Y_t = []
for iii in range(len(Y_tt)):
    Y_t.append(encode_emotion(Y_tt[iii]))

x_train, x_test, y_train, y_test = train_test_split(X_t, Y_t, random_state=0, shuffle=True)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

          0           1           2          3          4          5  \
0  0.097949 -262.274261  111.292137  -0.619845  48.630657 -10.620001   
1  0.292554 -101.032630   28.685769   8.788258  12.950129  -0.353596   
2  0.101923 -316.482178  100.732925  -1.121063  44.560493 -13.020816   
3  0.093924 -342.439514  130.888840   8.968784  52.469501 -19.662062   
4  0.250042 -179.444821   34.033375  18.841786  11.593927   1.859677   

          6          7         8          9  ...           173           174  \
0 -0.285672 -17.527323 -1.120136 -13.054270  ...  3.973798e-08  4.090360e-08   
1 -9.653648  -9.215764 -4.868280  -7.088598  ...  5.239636e-02  5.605551e-02   
2 -2.672002 -17.052134 -0.555105 -14.461847  ...  3.779767e-08  3.913161e-08   
3  9.624663 -18.212955  2.013108 -10.330219  ...  1.706682e-08  1.628212e-08   
4 -3.818330  -6.092026 -3.902650  -5.204026  ...  1.326073e-02  1.300624e-02   

            175           176           177           178           179  \
0  4.180675

In [None]:
from sklearn.model_selection import cross_val_score
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf_scores = []
best_rf_clf = None
best_accuracy = 0
best_iii = 0
for iii in tqdm(range(10, 51)):
    rf_clf = RandomForestClassifier(n_estimators=iii)
    cross_score = cross_val_score(rf_clf, X_t, Y_t)
    cross_mean = cross_score.mean()

    rf_scores.append(cross_mean)

    print(f"{iii} cross_mean: {cross_mean}")

    if cross_mean > best_accuracy:
        best_accuracy = cross_mean
        best_rf_clf = rf_clf
        best_iii = iii

In [None]:
print(f"Best n_estimators for RF {best_iii}")

In [None]:
plt.plot(range(10, 51), rf_scores)
plt.title("Random Forest")
plt.xlabel("Number of estimators")
plt.ylabel("Mean of cross validation")
plt.show()

In [None]:
print(best_accuracy)
print(best_iii)

In [None]:
best_rf_clf.fit(x_train, y_train)
rf_prediction = best_rf_clf.predict(x_test)
print(rf_prediction[0])
print(accuracy_score(rf_prediction, y_test))

In [None]:
pickle.dump(best_rf_clf, open('best_rf_clf.pkl', 'wb'))

In [6]:
clf = pickle.load(open('best_rf_clf.pkl', 'rb'))

In [9]:
from helper_functions import predict_for_model

predict_for_model(clf, scaler, my_audio_df)

/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/David_neutral.m4a
neutral
[[0.02083333 0.29166667 0.25       0.04166667 0.0625     0.3125
  0.02083333]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/Hi_David_sad.m4a
sad
[[0.16666667 0.08333333 0.20833333 0.0625     0.125      0.27083333
  0.08333333]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/David_angry.m4a
angry
[[0.29166667 0.04166667 0.08333333 0.         0.1875     0.35416667
  0.04166667]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/David_sad.m4a
sad
[[0.125      0.27083333 0.125      0.         0.10416667 0.22916667
  0.14583333]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/Hi_David_happy.m4a
happy
[[0.16666667 0.04166667 0.25       0.04166667 0.14583333 0.29166667
  0.0625    ]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/Hi_David_neutral.m4a
neutral
[[0.14583333 0.0625     0.33333333 0.020

In [None]:
from helper_functions import prepare_for_prediction, decode_emotion

for path, emotion in zip(my_audio_df.Path, my_audio_df.Emotions):
    print(path)
    print(emotion)
    features_my = [prepare_for_prediction(path)]
    #print(features_my)
    features_my = pd.DataFrame(features_my)
    features_my['labels'] = emotion
    #print(features_my.head)

    my_case_x = scaler.transform(features_my.iloc[:, :-1].values)
    prediction = clf.predict_proba(my_case_x)
    print(prediction)
    #print(decode_emotion(prediction[0]))

In [None]:
from sklearn.ensemble import ExtraTreesClassifier

extra_trees_scores = []
best_extra_trees_clf = None
best_accuracy = 0
best_iii = 49
for iii in tqdm(range(45, 70)):
    extra_trees_clf = ExtraTreesClassifier(n_estimators=iii)
    cross_score = cross_val_score(extra_trees_clf, X_t, Y_t)
    cross_mean = cross_score.mean()

    extra_trees_scores.append(cross_mean)

    print(f"{iii} cross_mean: {cross_mean}")

    if cross_mean > best_accuracy:
        best_accuracy = cross_mean
        best_extra_trees_clf = extra_trees_clf
        best_iii = iii

In [None]:
print(f"Best n_estimators for ExtraTreesClassifier {best_iii}")

In [None]:
plt.plot(range(45, 70), extra_trees_scores)
plt.title("ExtraTreesClassifier")
plt.xlabel("Number of estimators")
plt.ylabel("Mean of cross validation")
plt.show()

In [None]:
best_extra_trees_clf.fit(x_train, y_train)
extra_trees_prediction = best_extra_trees_clf.predict(x_test)
print(extra_trees_prediction[0])
print(accuracy_score(extra_trees_prediction, y_test))

In [None]:
pickle.dump(best_gradient_clf, open('best_gradient_clf.pkl', 'wb'))

In [None]:
clf = pickle.load(open('best_gradient_clf.pkl', 'rb'))

In [10]:
predict_for_model(clf, scaler, my_audio_df)

/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/David_neutral.m4a
neutral
[[0.02083333 0.29166667 0.25       0.04166667 0.0625     0.3125
  0.02083333]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/Hi_David_sad.m4a
sad
[[0.16666667 0.08333333 0.20833333 0.0625     0.125      0.27083333
  0.08333333]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/David_angry.m4a
angry
[[0.29166667 0.04166667 0.08333333 0.         0.1875     0.35416667
  0.04166667]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/David_sad.m4a
sad
[[0.125      0.27083333 0.125      0.         0.10416667 0.22916667
  0.14583333]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/Hi_David_happy.m4a
happy
[[0.16666667 0.04166667 0.25       0.04166667 0.14583333 0.29166667
  0.0625    ]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/Hi_David_neutral.m4a
neutral
[[0.14583333 0.0625     0.33333333 0.020

In [None]:
from sklearn.ensemble import AdaBoostClassifier

ada_scores = []
best_ada_clf = None
best_accuracy = 0
best_iii = 49
for iii in tqdm(range(10, 51)):
    ada_clf = AdaBoostClassifier(n_estimators=iii)
    cross_score = cross_val_score(ada_clf, X_t, Y_t)
    cross_mean = cross_score.mean()

    ada_scores.append(cross_mean)

    print(f"{iii} cross_mean: {cross_mean}")

    if cross_mean > best_accuracy:
        best_accuracy = cross_mean
        best_ada_clf = ada_clf
        best_iii = iii

In [None]:
print(f"Best n_estimators for AdaBoost {best_iii}")

In [None]:
plt.plot(range(10, 51), ada_scores)
plt.title("AdaBoost")
plt.xlabel("Number of estimators")
plt.ylabel("Mean of cross validation")
plt.show()

In [None]:
best_ada_clf.fit(x_train, y_train)
ada_prediction = best_ada_clf.predict(x_test)
print(ada_prediction[0])
print(accuracy_score(ada_prediction, y_test))

In [None]:
pickle.dump(best_ada_clf, open('best_ada_clf.pkl', 'wb'))

In [11]:
clf = pickle.load(open('best_ada_clf.pkl', 'rb'))
predict_for_model(clf, scaler, my_audio_df)

/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/David_neutral.m4a
neutral
[[0.15733193 0.14704257 0.15875289 0.14474621 0.14591753 0.16426584
  0.08194303]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/Hi_David_sad.m4a
sad
[[0.1501214  0.14619938 0.16139264 0.14987774 0.13753017 0.15384868
  0.10102999]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/David_angry.m4a
angry
[[0.164369   0.14019527 0.15865333 0.13656711 0.14941879 0.1617321
  0.08906439]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/David_sad.m4a
sad
[[0.15224476 0.14376313 0.15829917 0.1444852  0.1301907  0.15885076
  0.11216629]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/Hi_David_happy.m4a
happy
[[0.16304492 0.14007065 0.16123246 0.13610921 0.13941323 0.1599567
  0.10017283]]
/Users/davidtrafela/PycharmProjects/SU-emotion-recognition/my_audio/Hi_David_neutral.m4a
neutral
[[0.1501214  0.14619938 0.16139264 0.1

In [16]:
from tensorflow.python.keras.utils.np_utils import to_categorical

x_train_t = np.expand_dims(np.asarray(x_train), axis=2)
x_test_t = np.expand_dims(np.asarray(x_test), axis=2)

y_train_t = np.asarray(y_train)
y_test_t = np.asarray(y_test)
y_train_t = to_categorical(y_train_t, 7)
y_test_t = to_categorical(y_test_t, 7)

x_train_t.shape, y_train_t.shape, x_test_t.shape, y_test_t.shape

((23044, 182, 1), (23044, 7), (7682, 182, 1), (7682, 7))

In [18]:
from keras.layers import Conv1D, Dropout, Flatten, Dense
from keras.layers import MaxPooling1D
from keras import Sequential


model = Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train_t.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))

model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))

model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Dropout(0.2))

model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))

model.add(Flatten())
model.add(Dense(units=32, activation='swish'))
model.add(Dropout(0.3))

model.add(Dense(units=7, activation='sigmoid'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
pickle.dump(model, open('nn_model.pkl', 'wb'))

In [None]:
from keras.callbacks import ReduceLROnPlateau

rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=1, patience=2, min_lr=0.000001)
history = model.fit(x_train_t, y_train_t, batch_size=64, epochs=50, validation_split=0.15, callbacks=[rlrp])

In [None]:
evaluation = model.evaluate(x_test_t, y_test_t)
print(evaluation)
print("Accuracy of our model on test data : ", evaluation[1] * 100, "%")

In [None]:
epochs = [i for i in range(50)]
fig, ax = plt.subplots(2, 1)
train_acc = history.history['accuracy']
train_loss = history.history['loss']
test_acc = history.history['val_accuracy']
test_loss = history.history['val_loss']

fig.set_size_inches(10, 10)
ax[0].plot(epochs, train_loss, label='Training Loss')
ax[0].plot(epochs, test_loss, label='Validattion Loss')
ax[0].set_title('Training & Validation Loss')
ax[0].legend()
ax[0].set_xlabel("Epochs")

ax[1].plot(epochs, train_acc, label='Training Accuracy')
ax[1].plot(epochs, test_acc, label='Validation Accuracy')
ax[1].set_title('Training & Validation Accuracy')
ax[1].legend()
ax[1].set_xlabel("Epochs")
plt.show()

In [14]:
clf = pickle.load(open('nn_model.pkl', 'rb'))
predict_for_model(clf, scaler, my_audio_df, nn=True)

Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2022-12-22 12:44:04         4819
metadata.json                                  2022-12-22 12:44:04           64
variables.h5                                   2022-12-22 12:44:04      6760736
Keras weights file (<HDF5 file "variables.h5" (mode r)>) loading:
...layers
......conv1d
.........vars
............0
............1
......conv1d_1
.........vars
............0
............1
......conv1d_2
.........vars
............0
............1
......conv1d_3
.........vars
............0
............1
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......dropout
.........vars
......dropout_1
.........vars
......flatten
.........vars
......max_pooling1d
.........vars
......max_pooling1d_1
.........vars
......max_pooling1d_2
.........vars
......max_pooling1d_3
.........vars
...metrics


In [19]:
model2 = Sequential()
model2.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train_t.shape[1], 1)))
model2.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))

model2.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model2.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model2.add(Dropout(0.2))

model2.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model2.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))

model2.add(Flatten())
model2.add(Dense(units=32, activation='swish'))
model2.add(Dropout(0.3))

model2.add(Dense(units=7, activation='sigmoid'))
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [21]:
from keras.callbacks import ReduceLROnPlateau

rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=1, patience=2, min_lr=0.000001)
history2 = model2.fit(x_train_t, y_train_t, batch_size=64, epochs=50, validation_split=0.15, callbacks=[rlrp])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 31: ReduceLROnPlateau reducing learning rate to 0.0004000000189989805.
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 48: ReduceLROnPlateau reducing learning rate to 0.00016000000759959222.
Epoch 49/50
Epoch 50/50


In [26]:
pickle.dump(model2, open('nn_model2.pkl', 'wb'))

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......conv1d
.........vars
............0
............1
......conv1d_1
.........vars
............0
............1
......conv1d_2
.........vars
............0
............1
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......dropout
.........vars
......dropout_1
.........vars
......flatten
.........vars
......max_pooling1d
.........vars
......max_pooling1d_1
.........vars
......max_pooling1d_2
.........vars
...metrics
......mean
.........vars
............0
............1
......mean_metric_wrapper
.........vars
............0
............1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........13
.........14
.........15
.........16
.........17
.........18
.........19
.........2
.........20
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive saving:
File Name          

In [27]:
clf = pickle.load(open('nn_model2.pkl', 'rb'))
predict_for_model(clf, scaler, my_audio_df, nn=True)

Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2023-01-02 20:03:04         4087
metadata.json                                  2023-01-02 20:03:04           64
variables.h5                                   2023-01-02 20:03:04      3089552
Keras weights file (<HDF5 file "variables.h5" (mode r)>) loading:
...layers
......conv1d
.........vars
............0
............1
......conv1d_1
.........vars
............0
............1
......conv1d_2
.........vars
............0
............1
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......dropout
.........vars
......dropout_1
.........vars
......flatten
.........vars
......max_pooling1d
.........vars
......max_pooling1d_1
.........vars
......max_pooling1d_2
.........vars
...metrics
......mean
.........vars
............0
............1
......mean_metric_wrapper
.........vars


In [23]:
evaluation2 = model2.evaluate(x_test_t, y_test_t)
print(evaluation2)
print("Accuracy of our model2 on test data : ", evaluation2[1] * 100, "%")

[0.979719340801239, 0.6532152891159058]
Accuracy of our model2 on test data :  65.32152891159058 %


In [None]:
epochs = [i for i in range(50)]
fig, ax = plt.subplots(2, 1)
train_acc = history2.history['accuracy']
train_loss = history2.history['loss']
test_acc = history2.history['val_accuracy']
test_loss = history2.history['val_loss']

fig.set_size_inches(10, 10)
ax[0].plot(epochs, train_loss, label='Training Loss')
ax[0].plot(epochs, test_loss, label='Validattion Loss')
ax[0].set_title('Training & Validation Loss')
ax[0].legend()
ax[0].set_xlabel("Epochs")

ax[1].plot(epochs, train_acc, label='Training Accuracy')
ax[1].plot(epochs, test_acc, label='Validation Accuracy')
ax[1].set_title('Training & Validation Accuracy')
ax[1].legend()
ax[1].set_xlabel("Epochs")
plt.show()