In [2]:
import numpy as np
import pandas as pd
import keras
import sklearn
import keras.utils
import sys
import os
from python_speech_features import mfcc
from keras import Sequential, regularizers
from keras.layers import Dense
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import KFold, train_test_split
from numpy import argmax
from scipy.io.wavfile import read

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Helper Functions

In [3]:
def from_categorical(one_hot):
    return [argmax(x) for x in one_hot]

In [4]:
def binarize(labels):
    return [1 if x == 1 else 0 for x in labels]

In [18]:
def to_fann_dat(features, classes, output_path):
  if len(features) != len(classes):
    raise Exception("Sample lengths not the same")
  num_samples = len(features)
  num_features = features.shape[1]
  num_classes = classes.shape[1]
  
  try:
    f = open(output_path, 'w')
    
    # Write first line
    header_line = '{} {} {}\n'.format(num_samples, num_features, num_classes)
    f.write(header_line)
    
    # Write remaining lines
    for i in range(num_samples):
      input_line = ''
      for j in range(num_features):
        input_line += '{} '.format(features[i][j])
      input_line += '\n'
      
      output_line = ''
      for j in range(num_classes):
        output_line += '{} '.format(classes[i][j])
      output_line += '\n'
      f.write(input_line)
      f.write(output_line)
  finally:
    f.close()

# Definitions

In [6]:
EMOTIONS = {'W':'Anger',
            'L':'Boredom',
            'E':'Disgust',
            'A':'Fear',
            'F':'Happiness',
            'T':'Sadness',
            'N':'Neutral'
           }

Positions 1-2: number of speaker

Positions 3-5: code for text

Position 6: emotion (sorry, letter stands for german emotion word)

Position 7: if there are more than two versions these are numbered a, b, c ....

In [7]:
FILENAME_INDICES = {'speaker':range(0, 1),
                    'text':range(2,4),
                    'emotion':5,
                    'version':6}

In [8]:
PATH = './EmoDB/wav/'

In [9]:
BITRATE = 256000
SAMPLE_RATE = 16000

In [10]:
USED_EMOTIONS = {'Happiness':0, 'Anger':1, 'Sadness':2, 'Neutral':3}

In [11]:
NUM_MFCC = 25
NUM_LAYERS = 3
NUM_NEURONS = 30
DESIRED_ACC = 0.9

# Import Data

In [12]:
outputs = []
raw = []

for filename in os.listdir(PATH):
    if filename.endswith(".wav"):
        # Output label
        emotion = EMOTIONS[filename[FILENAME_INDICES['emotion']]]
        if emotion in USED_EMOTIONS.keys():
            outputs.append(USED_EMOTIONS[emotion])
            # Input .wav
            wav = read(PATH + filename)
            raw.append(np.array(wav[1],dtype=float))
    else:
        raise Exception('Invalid emotion label')



# Extract features via MFCC

Window size is the length of the signal

In [13]:
inputs = []
for signal in raw:
    duration = len(signal)/SAMPLE_RATE
    features = mfcc(signal, samplerate=SAMPLE_RATE, winlen=duration, nfft=len(signal), numcep=NUM_MFCC)
    inputs.append(features)

Normalize inputs

In [14]:
inputs_normalised = [sample[0] for sample in inputs]
inputs_normalised = sklearn.preprocessing.scale(inputs_normalised)

# Neural Network Prototype

Split data for test/train

In [15]:
x_train_split, x_test_split, y_train_split, y_test_split = train_test_split(inputs_normalised, outputs, test_size=0.33, random_state=42)
y_train_split = keras.utils.to_categorical(y_train_split, num_classes=len(USED_EMOTIONS))
y_test_split = keras.utils.to_categorical(y_test_split, num_classes=len(USED_EMOTIONS))

Train and evaluate neural network

In [16]:
acc = 0
while acc < DESIRED_ACC:
    model = Sequential()
    
    # Add first hidden layer
    model.add(Dense(NUM_NEURONS, 
                    activation='sigmoid', 
                    input_dim=NUM_MFCC))
    
    # Add hidden layers
    for i in range(NUM_LAYERS - 1):
        model.add(Dense(NUM_NEURONS, activation='sigmoid'))
    
    # Add output layer
    model.add(Dense(len(USED_EMOTIONS), activation='softmax'))
    
    # Compile and evaluate
    model.compile(loss='categorical_crossentropy',
                    optimizer=keras.optimizers.SGD(),
                    metrics=['accuracy'])    
    model.fit(x_train_split, y_train_split, epochs=1000, batch_size=5, verbose=0)
    loss, acc = model.evaluate(x_test_split, y_test_split)
    print('Accuracy: ' + str(acc))

Accuracy: 0.7767857142857143


KeyboardInterrupt: 

# Write Data for FannTool

In [19]:
to_fann_dat(x_train_split, y_train_split, 'emotion_train.dat')
to_fann_dat(x_test_split, y_test_split, 'emotion_test.dat')