# Convolutional Neural Network Using Mel Spectrogram Classifying Emotion Using One Gender

In [1]:
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
import pickle
import librosa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

%matplotlib inline

In [2]:
LOG_DIR = f"{int(time.time())}"

audio_data = np.load("../data/audio_data.npy")
labels = np.load("../data/wav_labels.npy")

# labels: modality-vocal channel-emotion-emotional intensity-statement-repetition-actor
# emotions: 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised
# odd number actors = male, even = female

# 1440 files: 24 speakers, 60 recordings per speaker
audio_data = audio_data.reshape(1440, 9480)

features = []

for i in range(1440):
    
    if (labels[i][6]%2 == 0):
        label = "Female"
    else:
        label = "Male"

    if (labels[i][2] == 1):
        em = 0
    elif (labels[i][2] == 2):
        em = 1
    elif (labels[i][2] == 3):
        em = 2
    elif (labels[i][2] == 4):
        em = 3
    elif (labels[i][2] == 5):
        em = 4
    elif (labels[i][2] == 6):
        em = 5
    elif (labels[i][2] == 7):
        em = 6
    elif (labels[i][2] == 8):
        em = 7
    
    features.append([audio_data[i], label, em])


    
feature_df = pd.DataFrame(features, columns = ["mfcc", "gender", "emotion"])

feature_df.head()


Unnamed: 0,mfcc,gender,emotion
0,"[-710.0553588867188, -709.9026489257812, -711....",Female,0
1,"[-547.765625, -548.0353393554688, -548.6129760...",Female,2
2,"[-616.4595336914062, -615.7564697265625, -615....",Female,7
3,"[-739.8626098632812, -738.7739868164062, -735....",Female,1
4,"[-698.0630493164062, -697.3838500976562, -696....",Female,4


In [3]:
#split data - males vs females
mal = feature_df.loc[feature_df['gender'] == "Male"]
fem = feature_df.loc[feature_df['gender'] == "Female"]

In [4]:
## ===== Males ===== ##
X_males = np.array(mal.mfcc.tolist())
y_males = np.array(mal.emotion.tolist())

#20-80 train-test split
X_train_males, X_test_males, y_train_males, y_test_males = train_test_split(X_males, y_males, test_size=0.20, random_state=0)

## ===== Females ===== ##
X_females = np.array(fem.mfcc.tolist())
y_females = np.array(fem.emotion.tolist())

#20-80 train-test split
X_train_females, X_test_females, y_train_females, y_test_females = train_test_split(X_females, y_females, test_size=0.20, random_state=0)

In [5]:
# Reshape for CNN input
X_train_females = np.array([x.reshape( (20, 474, 1) ) for x in X_train_females])
X_test_females = np.array([x.reshape( (20, 474, 1) ) for x in X_test_females])

# One-Hot encoding for classes
y_train_females = np.array(to_categorical(y_train_females, 8))
y_test_females = np.array(to_categorical(y_test_females, 8))

In [6]:
def build_model(hp):
    model = Sequential()

    model.add(Conv2D(hp.Int('input_units', min_value=32, max_value=256, step=32), (3,3), input_shape=X_test_females.shape[1:]))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))

    for i in range(hp.Int("n_layers", 1, 4)):
        model.add(Conv2D(hp.Int(f'conv_{i}_units', min_value=32, max_value=256, step=32), (3,3)))
        model.add(Activation('relu'))

    model.add(Flatten())
    model.add(Dense(64))

    model.add(Dense(8))
    model.add(Activation('softmax'))

    model.compile(
        optimizer="adam",
        loss="categorical_crossentropy",
        metrics=['accuracy'])

    return model

In [7]:
# model.fit(
# 	x=X_train_females,
# 	y=y_train_females,
#     epochs=50,
#     batch_size=32,
#     validation_split=0.15,
# 	verbose=0
# )

In [8]:
tuner = RandomSearch(
    build_model,
    objective = "val_accuracy",
    max_trials = 1,
    executions_per_trial = 1,
    directory = LOG_DIR
)

tuner.search(
    x=X_train_females,
    y=y_train_females,
    epochs=20,
    batch_size=64,
    validation_data=(X_test_females, y_test_females)
)

Trial 1 Complete [00h 00m 13s]
val_accuracy: 0.125

Best val_accuracy So Far: 0.125
Total elapsed time: 00h 00m 13s
INFO:tensorflow:Oracle triggered exit


In [9]:
with open(f"tuner_{int(time.time())}.pkl", "wb") as f:
    pickle.dump(tuner, f)

print(tuner.get_best_hyperparameters()[0].values)

NameError: name 'pickle' is not defined