In [1]:
from __future__ import division, print_function, absolute_import

import tensorflow as tf
import numpy as np
import os
import glob
import math
from functools import reduce
import operator as op

dataset_folder = os.path.abspath("./individual_npzs/{0}/*.npz")

## Parameters

In [2]:
learning_rate = 0.001
batch_size = 128
dropout = 0.75
max_pool = 2
strides = 1
input_size = 60000
output_size = 4
epochs = 30
timesteps = 38
hidden_layer = 64

validation_session = 4
test_session = 5

label_dictionary = {'neu': 0, 'hap': 1, 'sad': 2, 'ang': 3}

## Helper methods

In [3]:
def batch_generator(data, labels):
    steps = math.ceil(data.shape[0] / batch_size)
    for batch_step in range(0, steps):
        start = batch_size * batch_step
        end = batch_size * (batch_step + 1)
        yield data[start:end], labels[start:end]
        
def build_encoded_array(emotion_label):
    initialized_array = [0. for key in label_dictionary]
    initialized_array[label_dictionary[emotion_label]] = 1.
    return initialized_array
        
def onehot_encode(label_minibatch):
    return [build_encoded_array(emotion_label) for emotion_label in label_minibatch]

## Load dataset: 3 sessions for training, 1 for validation, 1 for test

In [4]:
train_dataset = []
validation_dataset = []
test_dataset = []

train_labels = []
validation_labels = []
test_labels = []

session_string = 'session{0}'

for i in range(1, 6):
    formatted = session_string.format(i)
    for spectrogram in glob.glob(dataset_folder.format(formatted)):
        loaded_spec = np.load(spectrogram)
        for x in loaded_spec['spectrograms']:
            if i != validation_session and i != test_session:
                train_dataset.append(x) 
            elif i == validation_session:
                validation_dataset.append(x)
            elif i == test_session:
                test_dataset.append(x)
        for x in loaded_spec['labels']:
            if i != validation_session and i != test_session:
                train_labels.append(x) 
            elif i == validation_session:
                validation_labels.append(x)
            elif i == test_session:
                test_labels.append(x)
        
train_dataset = np.asarray(train_dataset)
train_labels = np.asarray(train_labels)

validation_dataset = np.asarray(validation_dataset)
validation_labels = np.asarray(validation_labels)

test_dataset = np.asarray(test_dataset)
test_labels = np.asarray(test_labels)

In [5]:
train_data = np.zeros([len(train_dataset), train_dataset[0].shape[0], train_dataset[0].shape[1]], dtype=np.uint8)
for data in range(len(train_dataset)):
    train_data[data,:,:] = train_dataset[data]
    
validation_data = np.zeros([len(validation_dataset), validation_dataset[0].shape[0], validation_dataset[0].shape[1]], dtype=np.uint8)
for data in range(len(validation_dataset)):
    validation_data[data,:,:] = validation_dataset[data]
    
test_data = np.zeros([len(test_dataset), test_dataset[0].shape[0], test_dataset[0].shape[1]], dtype=np.uint8)
for data in range(len(test_dataset)):
    test_data[data,:,:] = test_dataset[data]

In [6]:
validation_data = validation_data.reshape((validation_data.shape[0], input_size))
validation_labels = onehot_encode(validation_labels)

test_data = test_data.reshape((test_data.shape[0], input_size))
test_labels = onehot_encode(test_labels)

## Convolutional

In [7]:
def conv2d(to_process, weights, biases, strides=1):
    conv_out = tf.nn.conv2d(to_process, weights, strides=[1, strides, strides, 1], padding='SAME')
    bias_out = tf.nn.bias_add(conv_out, biases)
    relu_out = tf.nn.relu(bias_out)
    return relu_out

def maxpool2d(to_pool, pool_size=2):
    maxpool_out = tf.nn.max_pool(to_pool, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_size, pool_size, 1], padding='SAME')
    return maxpool_out

def nn_pipeline(spectrogram, weights, biases):
    
    reshaped_input = tf.reshape(spectrogram, shape=[-1, 200, 300, 1])
    
    first_layer_out = conv2d(reshaped_input, weights['first_layer_weights'], biases['first_layer_biases'])
    first_maxpool_out = maxpool2d(first_layer_out, pool_size=2)
    
    second_layer_out = conv2d(first_maxpool_out, weights['second_layer_weights'], biases['second_layer_biases'])
    second_maxpool_out = maxpool2d(second_layer_out, pool_size=2)
    
    third_layer_out = conv2d(second_maxpool_out, weights['third_layer_weights'], biases['third_layer_biases'])
    third_maxpool_out = maxpool2d(third_layer_out, pool_size=2)
    
    reshape_for_fc = tf.reshape(third_maxpool_out, [-1, weights['fully_connected_weights'].get_shape().as_list()[0]])
    fully_connected_out = tf.add(tf.matmul(reshape_for_fc, weights['fully_connected_weights']), biases['fully_connected_biases'])
    fully_connected_activation = tf.nn.relu(fully_connected_out)
    fully_connected_dropout = tf.nn.dropout(fully_connected_activation, dropout)
    
    fully_connected_out_2 = tf.add(tf.matmul(fully_connected_dropout, weights['fully_connected_weights_2']), biases['fully_connected_biases_2'])
    fully_connected_activation_2 = tf.nn.relu(fully_connected_out_2)
    fully_connected_dropout_2 = tf.nn.dropout(fully_connected_activation_2, dropout)
    
    prediction = tf.add(tf.matmul(fully_connected_dropout_2, weights['output']), biases['output'])
    
    return prediction

## Testing

In [8]:
def nn_pipeline_rnn(spectrogram, weights, biases):
    reshaped_input = tf.reshape(spectrogram, shape=[-1, 200, 300, 1])

    first_layer_out = conv2d(reshaped_input, weights['first_layer_weights'], biases['first_layer_biases'])
    first_maxpool_out = maxpool2d(first_layer_out, pool_size=2)

    second_layer_out = conv2d(first_maxpool_out, weights['second_layer_weights'], biases['second_layer_biases'])
    second_maxpool_out = maxpool2d(second_layer_out, pool_size=2)

    third_layer_out = conv2d(second_maxpool_out, weights['third_layer_weights'], biases['third_layer_biases'])
    third_maxpool_out = maxpool2d(third_layer_out, pool_size=2)

    interim_shape = third_maxpool_out.get_shape().as_list()
    transposed = tf.transpose(third_maxpool_out, perm=[0, 2, 1, 3])
    reshape_for_rnn = tf.reshape(transposed, [-1, interim_shape[2], interim_shape[1]*interim_shape[3]])
    reshape_for_rnn.set_shape([None, interim_shape[2], interim_shape[1]*interim_shape[3]])

    hidden_list = [hidden_layer, hidden_layer]

    gru_fw_cell = [tf.contrib.rnn.GRUCell(hidden) for hidden in hidden_list]
    gru_bw_cell = [tf.contrib.rnn.GRUCell(hidden) for hidden in hidden_list]

    gru_output, _, _, = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(gru_fw_cell, gru_bw_cell, reshape_for_rnn, dtype=tf.float32)
    interim_shape_gru = tf.shape(gru_output)
    gru_flatten = tf.reshape(gru_output, [-1, interim_shape_gru[1]*interim_shape_gru[2]])
    
    fully_connected_out = tf.add(tf.matmul(gru_flatten, weights['gru_weights']), biases['gru_biases'])
    fully_connected_activation = tf.nn.relu(fully_connected_out)
    fully_connected_dropout = tf.nn.dropout(fully_connected_activation, dropout)
    
    prediction = tf.add(tf.matmul(fully_connected_dropout, weights['output']), biases['output'])
        
    return prediction

In [9]:
weights = {
    'first_layer_weights': tf.Variable(tf.random_normal([10, 15, 1, 16])),
    'second_layer_weights': tf.Variable(tf.random_normal([8, 10, 16, 24])),
    'third_layer_weights': tf.Variable(tf.random_normal([5, 8, 24, 32])),
    'fully_connected_weights': tf.Variable(tf.random_normal([25*38*32, 2048])),
    'fully_connected_weights_2': tf.Variable(tf.random_normal([2048, 2048])),
    'output': tf.Variable(tf.random_normal([2048, output_size]))
}

biases = {
    'first_layer_biases': tf.Variable(tf.random_normal([16])),
    'second_layer_biases': tf.Variable(tf.random_normal([24])),
    'third_layer_biases': tf.Variable(tf.random_normal([32])),
    'fully_connected_biases': tf.Variable(tf.random_normal([2048])),
    'fully_connected_biases_2': tf.Variable(tf.random_normal([2048])),
    'output': tf.Variable(tf.random_normal([output_size]))
}

In [10]:
weights_rnn = {
    'first_layer_weights': tf.Variable(tf.random_normal([10, 15, 1, 16])),
    'second_layer_weights': tf.Variable(tf.random_normal([8, 10, 16, 24])),
    'third_layer_weights': tf.Variable(tf.random_normal([5, 8, 24, 32])),
    'gru_weights': tf.Variable(tf.random_normal([2*hidden_layer*timesteps, hidden_layer])),
    'output': tf.Variable(tf.random_normal([hidden_layer, output_size]))
}

biases_rnn = {
    'first_layer_biases': tf.Variable(tf.random_normal([16])),
    'second_layer_biases': tf.Variable(tf.random_normal([24])),
    'third_layer_biases': tf.Variable(tf.random_normal([32])),
    'gru_biases': tf.Variable(tf.random_normal([hidden_layer])),
    'output': tf.Variable(tf.random_normal([output_size]))
}

In [11]:
X = tf.placeholder(tf.float32, [None, input_size])
Y = tf.placeholder(tf.float32, [None, output_size])

keep_prob = tf.placeholder(tf.float32)

logits = nn_pipeline_rnn(X, weights_rnn, biases_rnn)
prediction = tf.nn.softmax(logits)

loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
    logits=logits, labels=Y))
optimizer = tf.train.AdaGradOptimizer  (learning_rate=learning_rate)
train_trigger = optimizer.minimize(loss_function)

correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()

In [13]:
with tf.Session() as sess:
    sess.run(init)
    
    print('Session initialized.')
    
    for epoch_step in range(1, epochs+1):
        batch_gen = batch_generator(train_data, train_labels)
        for data_minibatch, label_minibatch in batch_gen:
            data_reshaped = data_minibatch.reshape((data_minibatch.shape[0], input_size))
            labels_encoded = onehot_encode(label_minibatch)
            sess.run(train_trigger, feed_dict={X: data_reshaped, Y: labels_encoded, keep_prob: dropout})
            train_loss, train_acc = sess.run([loss_function, accuracy], feed_dict={X: validation_data, Y: validation_labels, keep_prob: 1.0})
            print("Training accuracy of batch: Loss={:.4f}".format(train_loss) +", Accuracy={:.4f}.".format(train_acc))
        loss, acc = sess.run([loss_function, accuracy], feed_dict={X: validation_data, Y: validation_labels, keep_prob: 1.0})
        print("Validation after epoch #" + str(epoch_step) + ", Validation Loss= "+ "{:.4f}".format(loss) + ", Validation Accuracy= " + "{:.3f}".format(acc))
        
    print("Optimization Finished!")
    
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data,
                                      Y: test_labels,
                                      keep_prob: 1.0}))

Session initialized.
Training accuracy of batch: Loss=332.3513, Accuracy=0.2215.
Training accuracy of batch: Loss=248.9896, Accuracy=0.3403.
Training accuracy of batch: Loss=220.2322, Accuracy=0.3018.
Training accuracy of batch: Loss=224.0689, Accuracy=0.3451.
Training accuracy of batch: Loss=181.1047, Accuracy=0.2584.
Training accuracy of batch: Loss=192.1497, Accuracy=0.3034.
Training accuracy of batch: Loss=189.5631, Accuracy=0.3467.
Training accuracy of batch: Loss=152.6131, Accuracy=0.3547.
Training accuracy of batch: Loss=166.7588, Accuracy=0.3419.
Training accuracy of batch: Loss=143.9497, Accuracy=0.2729.
Training accuracy of batch: Loss=135.6255, Accuracy=0.2713.
Training accuracy of batch: Loss=140.8757, Accuracy=0.3018.
Training accuracy of batch: Loss=116.5505, Accuracy=0.2889.
Training accuracy of batch: Loss=109.7144, Accuracy=0.2953.
Training accuracy of batch: Loss=98.1888, Accuracy=0.2809.
Training accuracy of batch: Loss=91.5290, Accuracy=0.2905.
Training accuracy of 

Training accuracy of batch: Loss=4.4726, Accuracy=0.1621.
Training accuracy of batch: Loss=3.8780, Accuracy=0.1750.
Training accuracy of batch: Loss=3.5909, Accuracy=0.1589.
Training accuracy of batch: Loss=3.2941, Accuracy=0.1750.
Training accuracy of batch: Loss=3.5691, Accuracy=0.1750.
Training accuracy of batch: Loss=4.4866, Accuracy=0.2536.
Validation after epoch #8, Validation Loss= 4.6958, Validation Accuracy= 0.257
Training accuracy of batch: Loss=5.0069, Accuracy=0.2151.
Training accuracy of batch: Loss=4.1091, Accuracy=0.2376.
Training accuracy of batch: Loss=3.9979, Accuracy=0.1717.
Training accuracy of batch: Loss=3.4052, Accuracy=0.2343.
Training accuracy of batch: Loss=3.2786, Accuracy=0.2071.
Training accuracy of batch: Loss=3.3082, Accuracy=0.1653.
Training accuracy of batch: Loss=3.2689, Accuracy=0.1605.
Training accuracy of batch: Loss=3.3150, Accuracy=0.1814.
Training accuracy of batch: Loss=3.4136, Accuracy=0.1830.
Training accuracy of batch: Loss=2.8816, Accuracy=0

Training accuracy of batch: Loss=2.0740, Accuracy=0.1990.
Training accuracy of batch: Loss=2.0730, Accuracy=0.2472.
Training accuracy of batch: Loss=2.1598, Accuracy=0.1958.
Training accuracy of batch: Loss=2.2467, Accuracy=0.2151.
Training accuracy of batch: Loss=2.2887, Accuracy=0.2279.
Training accuracy of batch: Loss=2.2336, Accuracy=0.1974.
Training accuracy of batch: Loss=2.0845, Accuracy=0.2006.
Training accuracy of batch: Loss=2.4145, Accuracy=0.1461.
Training accuracy of batch: Loss=2.0508, Accuracy=0.2071.
Training accuracy of batch: Loss=2.0728, Accuracy=0.1958.
Training accuracy of batch: Loss=2.2714, Accuracy=0.2472.
Validation after epoch #16, Validation Loss= 2.4877, Validation Accuracy= 0.225
Training accuracy of batch: Loss=2.2212, Accuracy=0.2440.
Training accuracy of batch: Loss=2.5658, Accuracy=0.2392.
Training accuracy of batch: Loss=2.4670, Accuracy=0.2263.
Training accuracy of batch: Loss=2.2218, Accuracy=0.2311.
Training accuracy of batch: Loss=2.1670, Accuracy=

Training accuracy of batch: Loss=1.8725, Accuracy=0.2616.
Training accuracy of batch: Loss=2.1298, Accuracy=0.2231.
Training accuracy of batch: Loss=2.1334, Accuracy=0.2343.
Training accuracy of batch: Loss=1.8636, Accuracy=0.1974.
Training accuracy of batch: Loss=1.9983, Accuracy=0.2263.
Training accuracy of batch: Loss=2.0375, Accuracy=0.2424.
Training accuracy of batch: Loss=2.3641, Accuracy=0.2456.
Training accuracy of batch: Loss=1.8891, Accuracy=0.2167.
Training accuracy of batch: Loss=1.8024, Accuracy=0.2327.
Training accuracy of batch: Loss=1.9197, Accuracy=0.2295.
Training accuracy of batch: Loss=1.9342, Accuracy=0.1926.
Training accuracy of batch: Loss=1.9088, Accuracy=0.1717.
Training accuracy of batch: Loss=1.8869, Accuracy=0.2295.
Training accuracy of batch: Loss=1.9088, Accuracy=0.2119.
Training accuracy of batch: Loss=1.8180, Accuracy=0.2006.
Training accuracy of batch: Loss=2.1248, Accuracy=0.2279.
Validation after epoch #24, Validation Loss= 1.9016, Validation Accuracy

In [None]:
sess.close()