In [1]:
from __future__ import division, print_function, absolute_import

import tensorflow as tf
import numpy as np
import os
import glob
import math
from functools import reduce
import operator as op

dataset_folder = os.path.abspath("./individual_npzs/{0}/*.npz")

## Parameters

In [2]:
learning_rate = 0.0003
batch_size = 64
dropout = 0.75
max_pool = 2
strides = 1
input_size = 60000
output_size = 4
epochs = 30
timesteps = 38
seed_num = 72
hidden_layer = 64

validation_session = 4
test_session = 5

label_dictionary = {'neu': 0, 'hap': 1, 'sad': 2, 'ang': 3}

tf.set_random_seed(seed_num)

## Helper methods

In [3]:
def batch_generator(data, labels):
    steps = math.ceil(data.shape[0] / batch_size)
    for batch_step in range(0, steps):
        start = batch_size * batch_step
        end = batch_size * (batch_step + 1)
        yield data[start:end], labels[start:end]
        
def build_encoded_array(emotion_label):
    initialized_array = [0. for key in label_dictionary]
    initialized_array[label_dictionary[emotion_label]] = 1.
    return initialized_array
        
def onehot_encode(label_minibatch):
    return [build_encoded_array(emotion_label) for emotion_label in label_minibatch]

## Load dataset: 3 sessions for training, 1 for validation, 1 for test

In [4]:
train_dataset = []
validation_dataset = []
test_dataset = []

train_labels = []
validation_labels = []
test_labels = []

session_string = 'session{0}'

for i in range(1, 6):
    formatted = session_string.format(i)
    for spectrogram in glob.glob(dataset_folder.format(formatted)):
        loaded_spec = np.load(spectrogram)
        for x in loaded_spec['spectrograms']:
            if i != validation_session and i != test_session:
                train_dataset.append(x) 
            elif i == validation_session:
                validation_dataset.append(x)
            elif i == test_session:
                test_dataset.append(x)
        for x in loaded_spec['labels']:
            if i != validation_session and i != test_session:
                train_labels.append(x) 
            elif i == validation_session:
                validation_labels.append(x)
            elif i == test_session:
                test_labels.append(x)
        
train_dataset = np.asarray(train_dataset)
train_labels = np.asarray(train_labels)

validation_dataset = np.asarray(validation_dataset)
validation_labels = np.asarray(validation_labels)

test_dataset = np.asarray(test_dataset)
test_labels = np.asarray(test_labels)

In [5]:
train_data = np.zeros([len(train_dataset), train_dataset[0].shape[0], train_dataset[0].shape[1]], dtype=np.uint8)
for data in range(len(train_dataset)):
    train_data[data,:,:] = train_dataset[data]
    
validation_data = np.zeros([len(validation_dataset), validation_dataset[0].shape[0], validation_dataset[0].shape[1]], dtype=np.uint8)
for data in range(len(validation_dataset)):
    validation_data[data,:,:] = validation_dataset[data]
    
test_data = np.zeros([len(test_dataset), test_dataset[0].shape[0], test_dataset[0].shape[1]], dtype=np.uint8)
for data in range(len(test_dataset)):
    test_data[data,:,:] = test_dataset[data]

In [6]:
validation_data = validation_data.reshape((validation_data.shape[0], input_size))
validation_labels = onehot_encode(validation_labels)

test_data = test_data.reshape((test_data.shape[0], input_size))
test_labels = onehot_encode(test_labels)

## Convolutional

In [7]:
def conv2d(to_process, weights, biases, strides=1):
    conv_out = tf.nn.conv2d(to_process, weights, strides=[1, strides, strides, 1], padding='SAME')
    bias_out = tf.nn.bias_add(conv_out, biases)
    relu_out = tf.nn.relu(bias_out)
    return relu_out

def maxpool2d(to_pool, pool_size=2):
    maxpool_out = tf.nn.max_pool(to_pool, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_size, pool_size, 1], padding='SAME')
    return maxpool_out

def nn_pipeline(spectrogram, weights, biases):
    
    reshaped_input = tf.reshape(spectrogram, shape=[-1, 200, 300, 1])
    
    first_layer_out = conv2d(reshaped_input, weights['first_layer_weights'], biases['first_layer_biases'])
    first_maxpool_out = maxpool2d(first_layer_out, pool_size=2)
    
    second_layer_out = conv2d(first_maxpool_out, weights['second_layer_weights'], biases['second_layer_biases'])
    second_maxpool_out = maxpool2d(second_layer_out, pool_size=2)
    
    third_layer_out = conv2d(second_maxpool_out, weights['third_layer_weights'], biases['third_layer_biases'])
    third_maxpool_out = maxpool2d(third_layer_out, pool_size=2)
    
    reshape_for_fc = tf.reshape(third_maxpool_out, [-1, weights['fully_connected_weights'].get_shape().as_list()[0]])
    fully_connected_out = tf.add(tf.matmul(reshape_for_fc, weights['fully_connected_weights']), biases['fully_connected_biases'])
    fully_connected_activation = tf.nn.relu(fully_connected_out)
    fully_connected_dropout = tf.nn.dropout(fully_connected_activation, dropout)
    
    fully_connected_out_2 = tf.add(tf.matmul(fully_connected_dropout, weights['fully_connected_weights_2']), biases['fully_connected_biases_2'])
    fully_connected_activation_2 = tf.nn.relu(fully_connected_out_2)
    fully_connected_dropout_2 = tf.nn.dropout(fully_connected_activation_2, dropout)
    
    prediction = tf.add(tf.matmul(fully_connected_dropout_2, weights['output']), biases['output'])
    
    return prediction

## Testing

In [8]:
def nn_pipeline_rnn(spectrogram, weights, biases):
    reshaped_input = tf.reshape(spectrogram, shape=[-1, 200, 300, 1])

    first_layer_out = conv2d(reshaped_input, weights['first_layer_weights'], biases['first_layer_biases'])
    first_maxpool_out = maxpool2d(first_layer_out, pool_size=2)

    second_layer_out = conv2d(first_maxpool_out, weights['second_layer_weights'], biases['second_layer_biases'])
    second_maxpool_out = maxpool2d(second_layer_out, pool_size=2)

    third_layer_out = conv2d(second_maxpool_out, weights['third_layer_weights'], biases['third_layer_biases'])
    third_maxpool_out = maxpool2d(third_layer_out, pool_size=2)

    interim_shape = third_maxpool_out.get_shape().as_list()
    transposed = tf.transpose(third_maxpool_out, perm=[0, 2, 1, 3])
    reshape_for_rnn = tf.reshape(transposed, [-1, interim_shape[2], interim_shape[1]*interim_shape[3]])
    reshape_for_rnn.set_shape([None, interim_shape[2], interim_shape[1]*interim_shape[3]])

    hidden_list = [hidden_layer, hidden_layer]

    gru_fw_cell = [tf.contrib.rnn.GRUCell(hidden) for hidden in hidden_list]
    gru_bw_cell = [tf.contrib.rnn.GRUCell(hidden) for hidden in hidden_list]

    gru_output, _, _, = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(gru_fw_cell, gru_bw_cell, reshape_for_rnn, dtype=tf.float32)
    interim_shape_gru = tf.shape(gru_output)
    gru_flatten = tf.reshape(gru_output, [-1, interim_shape_gru[1]*interim_shape_gru[2]])
    
    fully_connected_out = tf.add(tf.matmul(gru_flatten, weights['gru_weights']), biases['gru_biases'])
    fully_connected_activation = tf.nn.relu(fully_connected_out)
    fully_connected_dropout = tf.nn.dropout(fully_connected_activation, dropout)
    
    prediction = tf.add(tf.matmul(fully_connected_dropout, weights['output']), biases['output'])
        
    return prediction

In [9]:
weights = {
    'first_layer_weights': tf.Variable(tf.random_normal([10, 15, 1, 16])),
    'second_layer_weights': tf.Variable(tf.random_normal([8, 10, 16, 24])),
    'third_layer_weights': tf.Variable(tf.random_normal([5, 8, 24, 32])),
    'fully_connected_weights': tf.Variable(tf.random_normal([25*38*32, 2048])),
    'fully_connected_weights_2': tf.Variable(tf.random_normal([2048, 2048])),
    'output': tf.Variable(tf.random_normal([2048, output_size]))
}

biases = {
    'first_layer_biases': tf.Variable(tf.random_normal([16])),
    'second_layer_biases': tf.Variable(tf.random_normal([24])),
    'third_layer_biases': tf.Variable(tf.random_normal([32])),
    'fully_connected_biases': tf.Variable(tf.random_normal([2048])),
    'fully_connected_biases_2': tf.Variable(tf.random_normal([2048])),
    'output': tf.Variable(tf.random_normal([output_size]))
}

In [10]:
weights_rnn = {
    'first_layer_weights': tf.Variable(tf.truncated_normal([10, 15, 1, 16], seed=seed_num)),
    'second_layer_weights': tf.Variable(tf.truncated_normal([8, 10, 16, 24], seed=seed_num)),
    'third_layer_weights': tf.Variable(tf.truncated_normal([5, 8, 24, 32], seed=seed_num)),
    'gru_weights': tf.Variable(tf.truncated_normal([2*hidden_layer*timesteps, hidden_layer], seed=seed_num)),
    'output': tf.Variable(tf.truncated_normal([hidden_layer, output_size], seed=seed_num))
}

biases_rnn = {
    'first_layer_biases': tf.Variable(tf.truncated_normal([16], seed=seed_num)),
    'second_layer_biases': tf.Variable(tf.truncated_normal([24], seed=seed_num)),
    'third_layer_biases': tf.Variable(tf.truncated_normal([32], seed=seed_num)),
    'gru_biases': tf.Variable(tf.truncated_normal([hidden_layer], seed=seed_num)),
    'output': tf.Variable(tf.truncated_normal([output_size], seed=seed_num))
}

In [11]:
X = tf.placeholder(tf.float32, [None, input_size])
Y = tf.placeholder(tf.float32, [None, output_size])

keep_prob = tf.placeholder(tf.float32)

logits = nn_pipeline_rnn(X, weights_rnn, biases_rnn)
prediction = tf.nn.softmax(logits)

loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
    logits=logits, labels=Y))
optimizer = tf.train.AdagradOptimizer  (learning_rate=learning_rate)
train_trigger = optimizer.minimize(loss_function)

correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()

In [12]:
with tf.Session() as sess:
    sess.run(init)
    
    print('Session initialized.')
    
    for epoch_step in range(1, epochs+1):
        batch_gen = batch_generator(train_data, train_labels)
        for data_minibatch, label_minibatch in batch_gen:
            data_reshaped = data_minibatch.reshape((data_minibatch.shape[0], input_size))
            labels_encoded = onehot_encode(label_minibatch)
            sess.run(train_trigger, feed_dict={X: data_reshaped, Y: labels_encoded, keep_prob: dropout})
            train_loss, train_acc = sess.run([loss_function, accuracy], feed_dict={X: validation_data, Y: validation_labels, keep_prob: 1.0})
            print("Training accuracy of batch: Loss={:.4f}".format(train_loss) +", Accuracy={:.4f}.".format(train_acc))
        loss, acc = sess.run([loss_function, accuracy], feed_dict={X: validation_data, Y: validation_labels, keep_prob: 1.0})
        print("Validation after epoch #" + str(epoch_step) + ", Validation Loss= "+ "{:.4f}".format(loss) + ", Validation Accuracy= " + "{:.3f}".format(acc))
        
    print("Optimization Finished!")
    
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data,
                                      Y: test_labels,
                                      keep_prob: 1.0}))

Session initialized.
Training accuracy of batch: Loss=170.5067, Accuracy=0.2392.
Training accuracy of batch: Loss=125.8124, Accuracy=0.2681.
Training accuracy of batch: Loss=97.0386, Accuracy=0.2937.
Training accuracy of batch: Loss=102.4819, Accuracy=0.3066.
Training accuracy of batch: Loss=106.6222, Accuracy=0.2889.
Training accuracy of batch: Loss=98.8450, Accuracy=0.3082.
Training accuracy of batch: Loss=97.7395, Accuracy=0.3034.
Training accuracy of batch: Loss=86.1508, Accuracy=0.3098.
Training accuracy of batch: Loss=86.6025, Accuracy=0.3082.
Training accuracy of batch: Loss=86.6390, Accuracy=0.2729.
Training accuracy of batch: Loss=87.8417, Accuracy=0.2713.
Training accuracy of batch: Loss=86.0487, Accuracy=0.3130.
Training accuracy of batch: Loss=86.6318, Accuracy=0.3242.
Training accuracy of batch: Loss=91.1543, Accuracy=0.3403.
Training accuracy of batch: Loss=93.4571, Accuracy=0.3274.
Training accuracy of batch: Loss=87.4516, Accuracy=0.3323.
Training accuracy of batch: Los

Training accuracy of batch: Loss=36.6125, Accuracy=0.2777.
Training accuracy of batch: Loss=35.0114, Accuracy=0.3162.
Training accuracy of batch: Loss=35.0405, Accuracy=0.3226.
Training accuracy of batch: Loss=34.3001, Accuracy=0.3130.
Training accuracy of batch: Loss=33.6318, Accuracy=0.3467.
Training accuracy of batch: Loss=32.8486, Accuracy=0.3018.
Training accuracy of batch: Loss=31.8346, Accuracy=0.3210.
Training accuracy of batch: Loss=32.3124, Accuracy=0.2889.
Training accuracy of batch: Loss=30.3891, Accuracy=0.3098.
Training accuracy of batch: Loss=31.6265, Accuracy=0.3291.
Training accuracy of batch: Loss=32.3320, Accuracy=0.3082.
Training accuracy of batch: Loss=35.0135, Accuracy=0.3130.
Training accuracy of batch: Loss=36.7819, Accuracy=0.3323.
Training accuracy of batch: Loss=34.1555, Accuracy=0.3018.
Training accuracy of batch: Loss=32.5467, Accuracy=0.3515.
Training accuracy of batch: Loss=31.5642, Accuracy=0.3483.
Training accuracy of batch: Loss=32.7978, Accuracy=0.311

Training accuracy of batch: Loss=15.0808, Accuracy=0.3066.
Training accuracy of batch: Loss=14.6837, Accuracy=0.3242.
Training accuracy of batch: Loss=15.4671, Accuracy=0.2793.
Training accuracy of batch: Loss=14.2888, Accuracy=0.3082.
Training accuracy of batch: Loss=14.3033, Accuracy=0.3098.
Training accuracy of batch: Loss=13.7479, Accuracy=0.3050.
Training accuracy of batch: Loss=15.1647, Accuracy=0.2584.
Training accuracy of batch: Loss=14.1993, Accuracy=0.3194.
Training accuracy of batch: Loss=14.6323, Accuracy=0.3419.
Training accuracy of batch: Loss=15.3699, Accuracy=0.3226.
Training accuracy of batch: Loss=17.3633, Accuracy=0.3531.
Training accuracy of batch: Loss=14.8409, Accuracy=0.3371.
Training accuracy of batch: Loss=15.3955, Accuracy=0.3050.
Training accuracy of batch: Loss=13.4020, Accuracy=0.3451.
Training accuracy of batch: Loss=14.1216, Accuracy=0.3146.
Training accuracy of batch: Loss=13.4663, Accuracy=0.2953.
Training accuracy of batch: Loss=14.6455, Accuracy=0.317

Training accuracy of batch: Loss=6.9294, Accuracy=0.3162.
Training accuracy of batch: Loss=6.5469, Accuracy=0.2937.
Training accuracy of batch: Loss=7.0050, Accuracy=0.2568.
Training accuracy of batch: Loss=7.3200, Accuracy=0.2343.
Training accuracy of batch: Loss=7.4035, Accuracy=0.2568.
Training accuracy of batch: Loss=7.6238, Accuracy=0.2937.
Training accuracy of batch: Loss=7.7565, Accuracy=0.3146.
Training accuracy of batch: Loss=8.5735, Accuracy=0.3114.
Training accuracy of batch: Loss=6.6138, Accuracy=0.3242.
Training accuracy of batch: Loss=6.9402, Accuracy=0.3194.
Training accuracy of batch: Loss=7.1479, Accuracy=0.3258.
Training accuracy of batch: Loss=6.5766, Accuracy=0.3114.
Training accuracy of batch: Loss=6.8391, Accuracy=0.2665.
Training accuracy of batch: Loss=6.6514, Accuracy=0.3162.
Training accuracy of batch: Loss=6.9390, Accuracy=0.2376.
Training accuracy of batch: Loss=5.7746, Accuracy=0.3258.
Training accuracy of batch: Loss=6.8877, Accuracy=0.2953.
Training accur

Training accuracy of batch: Loss=3.6675, Accuracy=0.3002.
Training accuracy of batch: Loss=3.8408, Accuracy=0.3387.
Training accuracy of batch: Loss=4.4433, Accuracy=0.3419.
Training accuracy of batch: Loss=4.7522, Accuracy=0.3772.
Training accuracy of batch: Loss=3.6792, Accuracy=0.3146.
Training accuracy of batch: Loss=3.5619, Accuracy=0.3307.
Training accuracy of batch: Loss=3.7603, Accuracy=0.3226.
Training accuracy of batch: Loss=4.1328, Accuracy=0.3050.
Training accuracy of batch: Loss=3.5615, Accuracy=0.2873.
Training accuracy of batch: Loss=3.8764, Accuracy=0.3130.
Training accuracy of batch: Loss=3.9283, Accuracy=0.2729.
Training accuracy of batch: Loss=3.2917, Accuracy=0.2568.
Training accuracy of batch: Loss=4.3288, Accuracy=0.2857.
Training accuracy of batch: Loss=3.2875, Accuracy=0.3114.
Training accuracy of batch: Loss=3.3590, Accuracy=0.2681.
Training accuracy of batch: Loss=3.5233, Accuracy=0.2424.
Training accuracy of batch: Loss=3.1991, Accuracy=0.2729.
Training accur

Training accuracy of batch: Loss=1.5388, Accuracy=0.3435.
Training accuracy of batch: Loss=1.5957, Accuracy=0.3660.
Training accuracy of batch: Loss=1.6747, Accuracy=0.3692.
Training accuracy of batch: Loss=1.5553, Accuracy=0.2937.
Training accuracy of batch: Loss=1.5219, Accuracy=0.2970.
Training accuracy of batch: Loss=1.7191, Accuracy=0.3612.
Training accuracy of batch: Loss=1.6133, Accuracy=0.3291.
Training accuracy of batch: Loss=1.5072, Accuracy=0.3531.
Training accuracy of batch: Loss=1.6798, Accuracy=0.3130.
Training accuracy of batch: Loss=1.5561, Accuracy=0.3050.
Training accuracy of batch: Loss=1.6410, Accuracy=0.3098.
Training accuracy of batch: Loss=1.5225, Accuracy=0.3066.
Training accuracy of batch: Loss=1.4679, Accuracy=0.2986.
Training accuracy of batch: Loss=1.4336, Accuracy=0.3483.
Training accuracy of batch: Loss=1.4517, Accuracy=0.2825.
Training accuracy of batch: Loss=1.4982, Accuracy=0.3050.
Training accuracy of batch: Loss=1.4787, Accuracy=0.3210.
Training accur

Training accuracy of batch: Loss=1.3419, Accuracy=0.3836.
Training accuracy of batch: Loss=1.3442, Accuracy=0.3836.
Training accuracy of batch: Loss=1.3437, Accuracy=0.3724.
Training accuracy of batch: Loss=1.3432, Accuracy=0.3740.
Training accuracy of batch: Loss=1.3395, Accuracy=0.4045.
Training accuracy of batch: Loss=1.3452, Accuracy=0.3644.
Training accuracy of batch: Loss=1.3432, Accuracy=0.3772.
Training accuracy of batch: Loss=1.3479, Accuracy=0.3483.
Training accuracy of batch: Loss=1.3497, Accuracy=0.3451.
Training accuracy of batch: Loss=1.3465, Accuracy=0.3644.
Training accuracy of batch: Loss=1.3429, Accuracy=0.3756.
Training accuracy of batch: Loss=1.3465, Accuracy=0.3660.
Training accuracy of batch: Loss=1.3449, Accuracy=0.3660.
Training accuracy of batch: Loss=1.3556, Accuracy=0.3499.
Validation after epoch #25, Validation Loss= 1.3542, Validation Accuracy= 0.355
Training accuracy of batch: Loss=1.3422, Accuracy=0.3788.
Training accuracy of batch: Loss=1.3444, Accuracy=

Training accuracy of batch: Loss=1.3467, Accuracy=0.3676.
Training accuracy of batch: Loss=1.3458, Accuracy=0.3612.
Training accuracy of batch: Loss=1.3480, Accuracy=0.3531.
Training accuracy of batch: Loss=1.3428, Accuracy=0.3836.
Training accuracy of batch: Loss=1.3457, Accuracy=0.3596.
Training accuracy of batch: Loss=1.3431, Accuracy=0.3724.
Training accuracy of batch: Loss=1.3406, Accuracy=0.3981.
Training accuracy of batch: Loss=1.3435, Accuracy=0.3933.
Training accuracy of batch: Loss=1.3481, Accuracy=0.3515.
Training accuracy of batch: Loss=1.3490, Accuracy=0.3563.
Validation after epoch #29, Validation Loss= 1.3479, Validation Accuracy= 0.366
Training accuracy of batch: Loss=1.3379, Accuracy=0.3900.
Training accuracy of batch: Loss=1.3541, Accuracy=0.3499.
Training accuracy of batch: Loss=1.3529, Accuracy=0.3724.
Training accuracy of batch: Loss=1.3516, Accuracy=0.3692.
Training accuracy of batch: Loss=1.3505, Accuracy=0.3596.
Training accuracy of batch: Loss=1.3392, Accuracy=

In [13]:
sess.close()