# Convolutional Neural Network Using Mel Spectrogram Classifying Emotion Using One Gender

In [1]:
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import to_categorical
import librosa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

%matplotlib inline

In [2]:
audio_data = np.load("../data/audio_data_mel_spec.npy")
labels = np.load("../data/wav_labels.npy")

# labels: modality-vocal channel-emotion-emotional intensity-statement-repetition-actor
# emotions: 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised
# odd number actors = male, even = female

# 1440 files: 24 speakers, 60 recordings per speaker
audio_data = audio_data.reshape(1440, 16384)

features = []

for i in range(1440):
    
    if (labels[i][6]%2 == 0):
        label = "Female"
    else:
        label = "Male"

    if (labels[i][2] == 1):
        em = 0
    elif (labels[i][2] == 2):
        em = 1
    elif (labels[i][2] == 3):
        em = 2
    elif (labels[i][2] == 4):
        em = 3
    elif (labels[i][2] == 5):
        em = 4
    elif (labels[i][2] == 6):
        em = 5
    elif (labels[i][2] == 7):
        em = 6
    elif (labels[i][2] == 8):
        em = 7
    
    features.append([audio_data[i], label, int(labels[i][2])-1])


    
feature_df = pd.DataFrame(features, columns = ["mel_spec", "gender", "emotion"])

feature_df.head()

Unnamed: 0,mel_spec,gender,emotion
0,"[1.0769573011160105e-09, 2.8049595979240394e-1...",Male,0
1,"[8.10519740213067e-09, 2.3336452770195137e-09,...",Male,0
2,"[1.8761321030069666e-07, 2.7834460070153e-07, ...",Male,0
3,"[1.213315243830948e-07, 1.1905444097237705e-07...",Male,0
4,"[6.763266924281197e-08, 1.9553900187929685e-07...",Male,1


In [3]:
#split data - males vs females
mal = feature_df.loc[feature_df['gender'] == "Male"]
fem = feature_df.loc[feature_df['gender'] == "Female"]

In [4]:
## ===== Males ===== ##
X_males = np.array(mal.mel_spec.tolist())
y_males = np.array(mal.emotion.tolist())

#20-80 train-test split
X_train_males, X_test_males, y_train_males, y_test_males = train_test_split(X_males, y_males, test_size=0.20, random_state=0)

## ===== Females ===== ##
X_females = np.array(fem.mel_spec.tolist())
y_females = np.array(fem.emotion.tolist())

#20-80 train-test split
X_train_females, X_test_females, y_train_females, y_test_females = train_test_split(X_females, y_females, test_size=0.20, random_state=0)

In [5]:
# Reshape for CNN input
X_train_females = np.array([x.reshape( (128, 128, 1) ) for x in X_train_females])
X_test_females = np.array([x.reshape( (128, 128, 1) ) for x in X_test_females])

# One-Hot encoding for classes
y_train_females = np.array(to_categorical(y_train_females, 8))
y_test_females = np.array(to_categorical(y_test_females, 8))

In [6]:
model = Sequential()
input_shape=(128, 128, 1)

model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(rate=0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))

model.add(Dense(8))
model.add(Activation('softmax'))

In [7]:
model.compile(
	optimizer="Adam",
	loss="categorical_crossentropy",
	metrics=['accuracy'])

model.fit(
	x=X_train_females,
	y=y_train_females,
    epochs=50,
    batch_size=128,
    validation_data= (X_test_females, y_test_females))

score = model.evaluate(
	x=X_test_females,
	y=y_test_females)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

NameError: name 'X_train' is not defined