In [1]:
from __future__ import division, print_function, absolute_import

import tensorflow as tf
import numpy as np
import os
import glob
import math
from functools import reduce
import operator as op
from sklearn.utils import class_weight

dataset_folder = os.path.abspath("./individual_npzs/{0}/*.npz")
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"

## Parameters

In [2]:
learning_rate = 0.0003
batch_size = 64
dropout = 0.5
max_pool = 2
strides = 1
input_size = 60000
output_size = 4
epochs = 30
timesteps = 38
seed_num = 72
hidden_layer = 64

validation_session = 4
test_session = 5

label_dictionary = {'neu': 0, 'hap': 1, 'sad': 2, 'ang': 3}

tf.set_random_seed(seed_num)

## Helper methods

In [3]:
def batch_generator(data, labels):
    steps = math.ceil(data.shape[0] / batch_size)
    for batch_step in range(0, steps):
        start = batch_size * batch_step
        end = batch_size * (batch_step + 1)
        yield data[start:end], labels[start:end]
        
def build_encoded_array(emotion_label):
    initialized_array = [0. for key in label_dictionary]
    initialized_array[label_dictionary[emotion_label]] = 1.
    return initialized_array
        
def onehot_encode(label_minibatch):
    return [build_encoded_array(emotion_label) for emotion_label in label_minibatch]

def compute_class_weights(labels):
    class_weights = np.ndarray.tolist(class_weight.compute_class_weight('balanced', np.unique(labels), labels))
    #class_weights_dict = {index: value for (index, value) in enumerate(class_weights)}
    return class_weights

## Load dataset: 3 sessions for training, 1 for validation, 1 for test

In [4]:
train_dataset = []
validation_dataset = []
test_dataset = []

train_labels = []
validation_labels = []
test_labels = []

session_string = 'session{0}'

for i in range(1, 6):
    formatted = session_string.format(i)
    for spectrogram in glob.glob(dataset_folder.format(formatted)):
        loaded_spec = np.load(spectrogram)
        for x in loaded_spec['spectrograms']:
            if i != validation_session and i != test_session:
                train_dataset.append(x) 
            elif i == validation_session:
                validation_dataset.append(x)
            elif i == test_session:
                test_dataset.append(x)
        for x in loaded_spec['labels']:
            if i != validation_session and i != test_session:
                train_labels.append(x) 
            elif i == validation_session:
                validation_labels.append(x)
            elif i == test_session:
                test_labels.append(x)
        
train_dataset = np.asarray(train_dataset)
train_labels = np.asarray(train_labels)

validation_dataset = np.asarray(validation_dataset)
validation_labels = np.asarray(validation_labels)

test_dataset = np.asarray(test_dataset)
test_labels = np.asarray(test_labels)

In [5]:
train_data = np.zeros([len(train_dataset), train_dataset[0].shape[0], train_dataset[0].shape[1]], dtype=np.uint8)
for data in range(len(train_dataset)):
    train_data[data,:,:] = train_dataset[data]
    
validation_data = np.zeros([len(validation_dataset), validation_dataset[0].shape[0], validation_dataset[0].shape[1]], dtype=np.uint8)
for data in range(len(validation_dataset)):
    validation_data[data,:,:] = validation_dataset[data]
    
test_data = np.zeros([len(test_dataset), test_dataset[0].shape[0], test_dataset[0].shape[1]], dtype=np.uint8)
for data in range(len(test_dataset)):
    test_data[data,:,:] = test_dataset[data]

In [6]:
validation_data = validation_data.reshape((validation_data.shape[0], input_size))
validation_labels = onehot_encode(validation_labels)

test_data = test_data.reshape((test_data.shape[0], input_size))
class_weights = compute_class_weights(test_labels)
test_labels = onehot_encode(test_labels)

## Convolutional

In [7]:
def conv2d(to_process, weights, biases, strides=1):
    conv_out = tf.nn.conv2d(to_process, weights, strides=[1, strides, strides, 1], padding='SAME')
    bias_out = tf.nn.bias_add(conv_out, biases)
    relu_out = tf.nn.relu(bias_out)
    return relu_out

def maxpool2d(to_pool, pool_size=2):
    maxpool_out = tf.nn.max_pool(to_pool, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_size, pool_size, 1], padding='SAME')
    return maxpool_out

def nn_pipeline(spectrogram, weights, biases):
    
    reshaped_input = tf.reshape(spectrogram, shape=[-1, 200, 300, 1])
    
    first_layer_out = conv2d(reshaped_input, weights['first_layer_weights'], biases['first_layer_biases'])
    first_maxpool_out = maxpool2d(first_layer_out, pool_size=2)
    
    second_layer_out = conv2d(first_maxpool_out, weights['second_layer_weights'], biases['second_layer_biases'])
    second_maxpool_out = maxpool2d(second_layer_out, pool_size=2)
    
    third_layer_out = conv2d(second_maxpool_out, weights['third_layer_weights'], biases['third_layer_biases'])
    third_maxpool_out = maxpool2d(third_layer_out, pool_size=2)
    
    reshape_for_fc = tf.reshape(third_maxpool_out, [-1, weights['fully_connected_weights'].get_shape().as_list()[0]])
    fully_connected_out = tf.add(tf.matmul(reshape_for_fc, weights['fully_connected_weights']), biases['fully_connected_biases'])
    fully_connected_activation = tf.nn.relu(fully_connected_out)
    fully_connected_dropout = tf.nn.dropout(fully_connected_activation, dropout)
    
    fully_connected_out_2 = tf.add(tf.matmul(fully_connected_dropout, weights['fully_connected_weights_2']), biases['fully_connected_biases_2'])
    fully_connected_activation_2 = tf.nn.relu(fully_connected_out_2)
    fully_connected_dropout_2 = tf.nn.dropout(fully_connected_activation_2, dropout)
    
    prediction = tf.add(tf.matmul(fully_connected_dropout_2, weights['output']), biases['output'])
    
    return prediction

## Testing

In [8]:
def nn_pipeline_rnn(spectrogram, weights, biases):
    reshaped_input = tf.reshape(spectrogram, shape=[-1, 200, 300, 1])

    first_layer_out = conv2d(reshaped_input, weights['first_layer_weights'], biases['first_layer_biases'])
    first_maxpool_out = maxpool2d(first_layer_out, pool_size=2)
    first_batch_norm = tf.layers.batch_normalization(first_maxpool_out)

    second_layer_out = conv2d(first_batch_norm, weights['second_layer_weights'], biases['second_layer_biases'])
    second_maxpool_out = maxpool2d(second_layer_out, pool_size=2)
    second_batch_norm = tf.layers.batch_normalization(second_maxpool_out)

    third_layer_out = conv2d(second_batch_norm, weights['third_layer_weights'], biases['third_layer_biases'])
    third_maxpool_out = maxpool2d(third_layer_out, pool_size=2)
    third_batch_norm = tf.layers.batch_normalization(third_maxpool_out)

    interim_shape = third_batch_norm.get_shape().as_list()
    transposed = tf.transpose(third_batch_norm, perm=[0, 2, 1, 3])
    reshape_for_rnn = tf.reshape(transposed, [-1, interim_shape[2], interim_shape[1]*interim_shape[3]])
    reshape_for_rnn.set_shape([None, interim_shape[2], interim_shape[1]*interim_shape[3]])

    hidden_list = [hidden_layer, hidden_layer]

    gru_fw_cell = [tf.contrib.rnn.GRUCell(hidden) for hidden in hidden_list]
    gru_bw_cell = [tf.contrib.rnn.GRUCell(hidden) for hidden in hidden_list]

    gru_output, _, _, = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(gru_fw_cell, gru_bw_cell, reshape_for_rnn, dtype=tf.float32)
    interim_shape_gru = tf.shape(gru_output)
    gru_flatten = tf.reshape(gru_output, [-1, interim_shape_gru[1]*interim_shape_gru[2]])
    
    fully_connected_out = tf.add(tf.matmul(gru_flatten, weights['gru_weights']), biases['gru_biases'])
    fully_connected_activation = tf.nn.relu(fully_connected_out)
    fully_connected_dropout = tf.nn.dropout(fully_connected_activation, dropout)
    
    prediction = tf.add(tf.matmul(fully_connected_dropout, weights['output']), biases['output'])
        
    return prediction

In [9]:
weights = {
    'first_layer_weights': tf.Variable(tf.random_normal([10, 15, 1, 16])),
    'second_layer_weights': tf.Variable(tf.random_normal([8, 10, 16, 24])),
    'third_layer_weights': tf.Variable(tf.random_normal([5, 8, 24, 32])),
    'fully_connected_weights': tf.Variable(tf.random_normal([25*38*32, 2048])),
    'fully_connected_weights_2': tf.Variable(tf.random_normal([2048, 2048])),
    'output': tf.Variable(tf.random_normal([2048, output_size]))
}

biases = {
    'first_layer_biases': tf.Variable(tf.random_normal([16])),
    'second_layer_biases': tf.Variable(tf.random_normal([24])),
    'third_layer_biases': tf.Variable(tf.random_normal([32])),
    'fully_connected_biases': tf.Variable(tf.random_normal([2048])),
    'fully_connected_biases_2': tf.Variable(tf.random_normal([2048])),
    'output': tf.Variable(tf.random_normal([output_size]))
}

In [10]:
weights_rnn = {
    'first_layer_weights': tf.Variable(tf.truncated_normal([10, 15, 1, 16], seed=seed_num)),
    'second_layer_weights': tf.Variable(tf.truncated_normal([8, 10, 16, 24], seed=seed_num)),
    'third_layer_weights': tf.Variable(tf.truncated_normal([5, 8, 24, 32], seed=seed_num)),
    'gru_weights': tf.Variable(tf.truncated_normal([2*hidden_layer*timesteps, hidden_layer], seed=seed_num)),
    'output': tf.Variable(tf.truncated_normal([hidden_layer, output_size], seed=seed_num))
}

biases_rnn = {
    'first_layer_biases': tf.Variable(tf.truncated_normal([16], seed=seed_num)),
    'second_layer_biases': tf.Variable(tf.truncated_normal([24], seed=seed_num)),
    'third_layer_biases': tf.Variable(tf.truncated_normal([32], seed=seed_num)),
    'gru_biases': tf.Variable(tf.truncated_normal([hidden_layer], seed=seed_num)),
    'output': tf.Variable(tf.truncated_normal([output_size], seed=seed_num))
}

In [11]:
X = tf.placeholder(tf.float32, [None, input_size])
Y = tf.placeholder(tf.float32, [None, output_size])

keep_prob = tf.placeholder(tf.float32)

logits = nn_pipeline_rnn(X, weights_rnn, biases_rnn)
prediction = tf.nn.softmax(logits)

weights = tf.reduce_sum(class_weights * Y, axis=1)
weight_regularizer = tf.add_n([tf.nn.l2_loss(weights_rnn[weights]) for weights in weights_rnn]) * 0.01
bias_regularizer = tf.add_n([tf.nn.l2_loss(biases_rnn[biases]) for biases in biases_rnn]) * 0.01

loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
    logits=logits, labels=Y)*weights) + weight_regularizer + bias_regularizer
optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)
train_trigger = optimizer.minimize(loss_function)

correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()

In [12]:
with tf.device('/GPU:1'):
    with tf.Session() as sess:
        sess.run(init)

        print('Session initialized.')

        for epoch_step in range(1, epochs+1):
            batch_gen = batch_generator(train_data, train_labels)
            for data_minibatch, label_minibatch in batch_gen:
                data_reshaped = data_minibatch.reshape((data_minibatch.shape[0], input_size))
                labels_encoded = onehot_encode(label_minibatch)
                sess.run(train_trigger, feed_dict={X: data_reshaped, Y: labels_encoded, keep_prob: dropout})
                train_loss, train_acc = sess.run([loss_function, accuracy], feed_dict={X: validation_data, Y: validation_labels, keep_prob: 1.0})
                print("Training accuracy of batch: Loss={:.4f}".format(train_loss) +", Accuracy={:.4f}.".format(train_acc))
            loss, acc = sess.run([loss_function, accuracy], feed_dict={X: validation_data, Y: validation_labels, keep_prob: 1.0})
            print("Validation after epoch #" + str(epoch_step) + ", Validation Loss= "+ "{:.4f}".format(loss) + ", Validation Accuracy= " + "{:.3f}".format(acc))

        print("Optimization Finished!")

        print("Testing Accuracy:", \
            sess.run(accuracy, feed_dict={X: test_data,
                                          Y: test_labels,
                                          keep_prob: 1.0}))

Session initialized.
Training accuracy of batch: Loss=412.7390, Accuracy=0.2841.
Training accuracy of batch: Loss=280.0649, Accuracy=0.3034.
Training accuracy of batch: Loss=235.0946, Accuracy=0.3547.
Training accuracy of batch: Loss=231.0252, Accuracy=0.3644.
Training accuracy of batch: Loss=231.4066, Accuracy=0.3499.
Training accuracy of batch: Loss=232.2294, Accuracy=0.3692.
Training accuracy of batch: Loss=225.5135, Accuracy=0.3628.
Training accuracy of batch: Loss=224.9844, Accuracy=0.3339.
Training accuracy of batch: Loss=218.2465, Accuracy=0.3612.
Training accuracy of batch: Loss=213.1193, Accuracy=0.3836.
Training accuracy of batch: Loss=217.9947, Accuracy=0.3467.
Training accuracy of batch: Loss=221.3722, Accuracy=0.3403.
Training accuracy of batch: Loss=217.3204, Accuracy=0.3933.
Training accuracy of batch: Loss=220.8102, Accuracy=0.3708.
Training accuracy of batch: Loss=219.7844, Accuracy=0.3804.
Training accuracy of batch: Loss=220.7055, Accuracy=0.3676.
Training accuracy o

Training accuracy of batch: Loss=170.2940, Accuracy=0.3820.
Training accuracy of batch: Loss=170.7321, Accuracy=0.3804.
Training accuracy of batch: Loss=171.5083, Accuracy=0.3547.
Training accuracy of batch: Loss=169.8801, Accuracy=0.3644.
Training accuracy of batch: Loss=170.1165, Accuracy=0.3756.
Training accuracy of batch: Loss=169.7102, Accuracy=0.3900.
Training accuracy of batch: Loss=170.7073, Accuracy=0.3756.
Training accuracy of batch: Loss=169.3211, Accuracy=0.3884.
Training accuracy of batch: Loss=169.1282, Accuracy=0.3740.
Training accuracy of batch: Loss=168.7306, Accuracy=0.3628.
Training accuracy of batch: Loss=170.1703, Accuracy=0.3451.
Training accuracy of batch: Loss=169.4748, Accuracy=0.3788.
Training accuracy of batch: Loss=169.9965, Accuracy=0.3740.
Training accuracy of batch: Loss=171.3423, Accuracy=0.3756.
Training accuracy of batch: Loss=172.6695, Accuracy=0.3788.
Training accuracy of batch: Loss=171.0466, Accuracy=0.3868.
Training accuracy of batch: Loss=170.154

Training accuracy of batch: Loss=154.8683, Accuracy=0.3579.
Training accuracy of batch: Loss=154.9871, Accuracy=0.3644.
Training accuracy of batch: Loss=154.8611, Accuracy=0.3403.
Training accuracy of batch: Loss=154.6666, Accuracy=0.3756.
Training accuracy of batch: Loss=153.9290, Accuracy=0.3676.
Training accuracy of batch: Loss=154.3587, Accuracy=0.3628.
Training accuracy of batch: Loss=155.1525, Accuracy=0.3884.
Training accuracy of batch: Loss=154.3750, Accuracy=0.3756.
Training accuracy of batch: Loss=154.3249, Accuracy=0.3596.
Training accuracy of batch: Loss=154.0244, Accuracy=0.3323.
Training accuracy of batch: Loss=154.4106, Accuracy=0.3291.
Training accuracy of batch: Loss=154.9559, Accuracy=0.3772.
Training accuracy of batch: Loss=154.9299, Accuracy=0.3868.
Training accuracy of batch: Loss=154.9258, Accuracy=0.3852.
Training accuracy of batch: Loss=155.6367, Accuracy=0.3933.
Training accuracy of batch: Loss=154.9003, Accuracy=0.3788.
Training accuracy of batch: Loss=154.459

Training accuracy of batch: Loss=148.7001, Accuracy=0.3820.
Training accuracy of batch: Loss=148.9270, Accuracy=0.3708.
Training accuracy of batch: Loss=148.6372, Accuracy=0.3868.
Training accuracy of batch: Loss=148.8761, Accuracy=0.3852.
Training accuracy of batch: Loss=148.8384, Accuracy=0.3900.
Training accuracy of batch: Loss=148.7176, Accuracy=0.3804.
Training accuracy of batch: Loss=148.7822, Accuracy=0.3965.
Training accuracy of batch: Loss=148.6248, Accuracy=0.3852.
Training accuracy of batch: Loss=148.5679, Accuracy=0.3740.
Training accuracy of batch: Loss=148.7650, Accuracy=0.3692.
Training accuracy of batch: Loss=148.6633, Accuracy=0.3387.
Training accuracy of batch: Loss=148.5452, Accuracy=0.3852.
Training accuracy of batch: Loss=148.5417, Accuracy=0.3884.
Training accuracy of batch: Loss=148.7396, Accuracy=0.3852.
Training accuracy of batch: Loss=148.6677, Accuracy=0.3852.
Training accuracy of batch: Loss=148.7007, Accuracy=0.3868.
Training accuracy of batch: Loss=148.545

Training accuracy of batch: Loss=146.4550, Accuracy=0.3997.
Training accuracy of batch: Loss=146.4662, Accuracy=0.3997.
Training accuracy of batch: Loss=146.3649, Accuracy=0.3917.
Training accuracy of batch: Loss=146.3974, Accuracy=0.3981.
Training accuracy of batch: Loss=146.3887, Accuracy=0.3965.
Training accuracy of batch: Loss=146.3337, Accuracy=0.3997.
Training accuracy of batch: Loss=146.4179, Accuracy=0.3997.
Training accuracy of batch: Loss=146.3280, Accuracy=0.3949.
Training accuracy of batch: Loss=146.3091, Accuracy=0.3981.
Training accuracy of batch: Loss=146.2997, Accuracy=0.3917.
Training accuracy of batch: Loss=146.3362, Accuracy=0.3917.
Training accuracy of batch: Loss=146.3916, Accuracy=0.3965.
Training accuracy of batch: Loss=146.3956, Accuracy=0.3949.
Training accuracy of batch: Loss=146.4192, Accuracy=0.3997.
Training accuracy of batch: Loss=146.4753, Accuracy=0.3981.
Training accuracy of batch: Loss=146.4203, Accuracy=0.3981.
Training accuracy of batch: Loss=146.469

Training accuracy of batch: Loss=146.3457, Accuracy=0.3949.
Training accuracy of batch: Loss=146.3232, Accuracy=0.3981.
Training accuracy of batch: Loss=146.3187, Accuracy=0.3917.
Training accuracy of batch: Loss=146.3426, Accuracy=0.3965.
Training accuracy of batch: Loss=146.3551, Accuracy=0.3949.
Training accuracy of batch: Loss=146.3035, Accuracy=0.3884.
Training accuracy of batch: Loss=146.3022, Accuracy=0.4013.
Training accuracy of batch: Loss=146.3304, Accuracy=0.3933.
Training accuracy of batch: Loss=146.3155, Accuracy=0.3884.
Training accuracy of batch: Loss=146.2733, Accuracy=0.3965.
Training accuracy of batch: Loss=146.2809, Accuracy=0.3900.
Training accuracy of batch: Loss=146.3395, Accuracy=0.4013.
Training accuracy of batch: Loss=146.3846, Accuracy=0.3949.
Training accuracy of batch: Loss=146.3763, Accuracy=0.3933.
Training accuracy of batch: Loss=146.3844, Accuracy=0.3949.
Training accuracy of batch: Loss=146.3466, Accuracy=0.4045.
Training accuracy of batch: Loss=146.338

Training accuracy of batch: Loss=146.3361, Accuracy=0.3917.
Training accuracy of batch: Loss=146.2655, Accuracy=0.3997.
Training accuracy of batch: Loss=146.3014, Accuracy=0.4013.
Training accuracy of batch: Loss=146.2831, Accuracy=0.4045.
Training accuracy of batch: Loss=146.2813, Accuracy=0.3917.
Training accuracy of batch: Loss=146.2653, Accuracy=0.3981.
Training accuracy of batch: Loss=146.2501, Accuracy=0.3949.
Training accuracy of batch: Loss=146.2718, Accuracy=0.3949.
Training accuracy of batch: Loss=146.2657, Accuracy=0.3965.
Training accuracy of batch: Loss=146.2228, Accuracy=0.3965.
Training accuracy of batch: Loss=146.2669, Accuracy=0.3884.
Training accuracy of batch: Loss=146.2989, Accuracy=0.3949.
Training accuracy of batch: Loss=146.3112, Accuracy=0.3933.
Training accuracy of batch: Loss=146.3476, Accuracy=0.3949.
Training accuracy of batch: Loss=146.4040, Accuracy=0.3981.
Training accuracy of batch: Loss=146.3992, Accuracy=0.3868.
Training accuracy of batch: Loss=146.321

Training accuracy of batch: Loss=146.2427, Accuracy=0.4029.
Training accuracy of batch: Loss=146.2707, Accuracy=0.3917.
Training accuracy of batch: Loss=146.2045, Accuracy=0.3933.
Training accuracy of batch: Loss=146.3179, Accuracy=0.3949.
Training accuracy of batch: Loss=146.3064, Accuracy=0.3900.
Training accuracy of batch: Loss=146.2929, Accuracy=0.4013.
Training accuracy of batch: Loss=146.2658, Accuracy=0.3997.
Training accuracy of batch: Loss=146.3166, Accuracy=0.3900.
Training accuracy of batch: Loss=146.2635, Accuracy=0.3981.
Training accuracy of batch: Loss=146.2218, Accuracy=0.3949.
Training accuracy of batch: Loss=146.2931, Accuracy=0.3997.
Training accuracy of batch: Loss=146.3067, Accuracy=0.3981.
Training accuracy of batch: Loss=146.2773, Accuracy=0.3917.
Training accuracy of batch: Loss=146.3498, Accuracy=0.3900.
Training accuracy of batch: Loss=146.4197, Accuracy=0.3981.
Training accuracy of batch: Loss=146.3167, Accuracy=0.3949.
Training accuracy of batch: Loss=146.350

In [None]:
confusion_matrix = tf.confusion_matrix()

In [13]:
sess.close()