In [None]:
import librosa
import librosa.display
import os
import glob
import pickle

In [None]:
import itertools
import random
import numpy as np
import pandas as pd
import statistics 
import scipy.stats
import math

In [None]:
import tensorflow as tf
from tensorflow.keras import Input, layers, Model, utils, initializers, losses, optimizers, Sequential, callbacks, backend
from keras.utils import conv_utils
import tensorflow.keras as K

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import recall_score as recall
from sklearn.metrics import confusion_matrix as confusion

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd '/content/drive/My Drive/BTP - Dev Priya and Kushagra/Speech Emotion Recognition/'

/content/drive/My Drive/BTP - Dev Priya and Kushagra/Speech Emotion Recognition


In [None]:
%ls ./Dataset/

'=1.2.0'
 dataValid_1FTest_1M.pkl
 dataValid_1MTest_2F.pkl
 dataValid_2FTest_2M.pkl
 dataValid_2MTest_3F.pkl
 dataValid_3FTest_3M.pkl
 dataValid_3MTest_4F.pkl
 dataValid_4FTest_4M.pkl
 dataValid_4MTest_5F.pkl
 dataValid_5FTest_5M.pkl
 dataValid_5MTest_1F.pkl
 steam-train-whistle-daniel_simon-converted-from-mp3.wav


In [None]:
dataset_dir = 'Dataset'
ser_output_dir = 'Final Outputs/FishnetIEMOCAP'

In [None]:
speaker_list = ['1F', '1M', '2F', '2M', '3F', '3M', '4F', '4M', '5F', '5M']
num_runs = 5
ser_acc = []
seed_value = None

# Model

## Custom Layer

In [None]:
class CustomAttention(tf.keras.layers.Layer):
    def __init__(self, *args, **kwargs):
        self.hidden_size = kwargs.pop('hidden_size')
        super(CustomAttention, self).__init__(*args, **kwargs)
        self.W_omega = tf.Variable(
            initializers.GlorotNormal(seed=0)(shape=[self.hidden_size, 1]), 
            dtype=tf.float32,
            trainable=True,
            name="W_omega")
        self.b_omega = tf.Variable(
            initializers.GlorotNormal(seed=0)(shape=[1]), 
            dtype=tf.float32,
            trainable=True,
            name="b_omega")
        self.u_omega = tf.Variable(
            initializers.GlorotNormal(seed=0)(shape=[1]), 
            dtype=tf.float32,
            trainable=True,
            name="u_omega")
    
    def call(self, inputs, **kwargs):
        v = tf.sigmoid(tf.tensordot(inputs, self.W_omega, axes=1) + self.b_omega)
        vu = tf.tensordot(v, self.u_omega, axes=1)
        alphas = layers.Softmax()(vu)
        return tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)
        
    def get_config(self):
        config = ({
            'hidden_size': self.hidden_size 
        })
        base_config = super(CustomAttention, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

## Build Model

### Fishnet 14.0
Trying to implement arch from paper itself

In [None]:
hparams = {
'OUT_CHANNELS' : [6],
'FILTER_CONV1' : 128,
'KERNEL_CONV1' : (3, 3),
'STRIDE_CONV1' : (1, 1),
'BIAS_INIT' : 'ones',
'KERNEL_INIT' : 'glorot_normal',
'PADDING_CONV' : 'SAME',
'BN_MOMENTUM' : 0.99,
'LEAKY_ALPHA' : 0.3,
'SIZE_POOL_CONV1' : (2, 4),
'PADDING_POOL_CONV1' : 'VALID',
'NUM_DIL_LAYERS' : 3,
'FILTER_CONV_UFLB' : 128,
'KERNEL_CONV_UFLB' :  (3, 3),
'STRIDE_CONV_UFLB' : (1, 1),
'DIL_RATE_CONV_UFLB' : (2, 2),
'PADDING_CONV_UFLB' : 'SAME',
'BILSTM_UNITS_SPEC' : 240,
'UNITS_FCN' : [240, 48],
'LR' : 0.00001,
}

bottleneck function source: https://github.com/kevin-ssy/FishNet/blob/master/models/fish_block.py

In [None]:
# @tf.function
def conv_block(input, out_channels, kernel_size=(5, 3), strides=(1, 1)):
    x = layers.BatchNormalization(momentum=hparams['BN_MOMENTUM'])(input)
    x = layers.Conv2D(filters=out_channels, kernel_size=kernel_size, 
                      strides=strides,
                      bias_initializer=hparams['BIAS_INIT'],
                      kernel_initializer=hparams['KERNEL_INIT'],
                      padding=hparams['PADDING_CONV_UFLB'])(x)
    x = layers.LeakyReLU(alpha=hparams['LEAKY_ALPHA'])(x)
    return x

In [None]:
# @tf.function
def bottleneck(input, filters, mode="NORM", kernel_size=(3, 3), strides=(1, 1), dilation_rate=(1, 1)):
    inplanes = input.shape[-1]
    # btnk_filters = filters // 2

    x = layers.BatchNormalization(momentum=hparams['BN_MOMENTUM'])(input)
    x = layers.Conv2D(filters=inplanes, kernel_size=(5, 3),  
                      strides=(1, 1), dilation_rate=(2, 2), 
                      bias_initializer=hparams['BIAS_INIT'],
                      kernel_initializer=hparams['KERNEL_INIT'],
                      padding=hparams['PADDING_CONV'])(x)
    x = layers.LeakyReLU(alpha=hparams['LEAKY_ALPHA'])(x)

    x = layers.BatchNormalization(momentum=hparams['BN_MOMENTUM'])(x)
    x = layers.Conv2D(filters=filters, kernel_size=(5, 3),
                      strides=(1, 1), dilation_rate=(1, 1), 
                      bias_initializer=hparams['BIAS_INIT'],
                      kernel_initializer=hparams['KERNEL_INIT'],
                      padding=hparams['PADDING_CONV'])(x)
    x = layers.LeakyReLU(alpha=hparams['LEAKY_ALPHA'])(x)

    x = layers.BatchNormalization(momentum=hparams['BN_MOMENTUM'])(x)
    x = layers.Conv2D(filters=filters, kernel_size=(5, 3),
                      strides=(1, 1), dilation_rate=(2, 2), 
                      bias_initializer=hparams['BIAS_INIT'],
                      kernel_initializer=hparams['KERNEL_INIT'],
                      padding=hparams['PADDING_CONV'])(x)
    x = layers.LeakyReLU(alpha=hparams['LEAKY_ALPHA'])(x)

    if mode == "UP":
        y = None    #shortcut
    elif inplanes != filters:
        y = layers.BatchNormalization(momentum=hparams['BN_MOMENTUM'])(input)
        y = layers.Conv2D(filters=filters, kernel_size=(5, 3),
                          strides=strides, dilation_rate=(1, 1), 
                          bias_initializer=hparams['BIAS_INIT'],
                          kernel_initializer=hparams['KERNEL_INIT'],
                          padding=hparams['PADDING_CONV'])(y)
        y = layers.LeakyReLU(alpha=hparams['LEAKY_ALPHA'])(y)
    else:
        y = None

    return x, y


channelwise_reduction, ur_block, dr_block, se_block source: paper

In [None]:
# @tf.function
def channelwise_reduction(x, k):
    cin = x.shape[-1]
    cout = cin // k
    y = tf.reshape(x, [-1, x.shape[1], x.shape[2], cout, k])
    y = tf.reduce_sum(y, -1)
    return y

In [None]:
# @tf.function
def ur_block(input, k=4, pool_size=(2, 2), kernel_size=(5, 3), strides=(1, 1), dilation_rate=(1, 1)):
    # input = layers.concatenate([input, residual])
    units = input.shape[-1]
    mx, _ = bottleneck(input, units // k, "UP", kernel_size, strides, dilation_rate)
    rx = channelwise_reduction(input, k)
    input = layers.Add()([mx, rx])
    input = layers.UpSampling2D(size=pool_size)(input)
    return input

In [None]:
# @tf.function
def dr_block(input, k=1, pool_size=(2, 2), kernel_size=(5, 3), strides=(1, 1), dilation_rate=(1, 1)):
    # input = layers.concatenate([input, residual])
    units = input.shape[-1]
    cout = int(units*k)
    mx, y = bottleneck(input, cout, "NORM", kernel_size, strides, dilation_rate)
    if y != None:
        input = layers.Add()([mx, y])
    else:
        input = layers.Add()([mx, input])
    input = layers.MaxPool2D(pool_size=pool_size)(input)
    return input

In [None]:
# @tf.function
def se_block(input, transform_fn=None, r=8):
    if transform_fn is not None:
        input = transform_fn(input)
    y = layers.GlobalAveragePooling2D()(input)
    units = y.shape[-1]
    y = layers.Dense(units=units // r, activation="linear")(y)
    y = layers.LeakyReLU(alpha=hparams['LEAKY_ALPHA'])(y)
    y = layers.Dense(units=units, activation="sigmoid")(y) # Sigmoid activation
    input = tf.transpose(input, perm=[1,2,0,3])
    input = tf.math.multiply(input, y)
    input = tf.transpose(input, perm=[2,0,1,3])
    return input

In [None]:
input_shape = (300, 40, 3)

In [44]:
def build_model():
    model_input_spec = Input(shape=input_shape, name='spec_features')
    x = conv_block(model_input_spec, hparams['OUT_CHANNELS'][0])
    ############################# TAIL ########################################     
    x = dr_block(x, k=4, pool_size=(2, 1))
    t1 = x
    print(('t1', x.shape))

    x = dr_block(x, pool_size=(1, 2))
    t2 = x
    print(('t2', x.shape))

    x = dr_block(x, pool_size=(1, 2))
    t3 = x
    print(('t3', x.shape))

    x = dr_block(x, k=2,  pool_size=(2, 1))
    t4 = x
    print(('t4', x.shape))
   
    x = se_block(x) # W x H x C3
    ############################# BODY ######################################## 
    
    x = ur_block(x, k=2, pool_size=(2, 1))
    x = layers.concatenate([t3, x]) # W x H x C2+C3
    b1 = x
    print(('b1', x.shape))

    x = ur_block(x, k=2, pool_size=(1, 2)) # 2W x 2H x (C2+C3)/k
    x = layers.concatenate([t2, x]) # 2W x 2H x C1+(C2+C3)/k
    b2 = x
    print(('b2', x.shape))

    x = ur_block(x, k=2, pool_size=(1, 2)) # 2W x 2H x (C2+C3)/k
    x = layers.concatenate([t1, x]) # 2W x 2H x C1+(C2+C3)/k
    b3 = x
    print(('b3', x.shape))

    ############################# HEAD ########################################
    x = dr_block(x, pool_size=(1, 2))
    x = layers.concatenate([b2, x]) 
    print(('h1', x.shape))
    
    x = dr_block(x, pool_size=(1, 2)) 
    x = layers.concatenate([b1, x]) 
    print(('h2', x.shape))

    x = dr_block(x, pool_size=(2, 1)) 
    x = layers.concatenate([t4, x]) 
    print(('h3', x.shape))

    #############################################################################

    # x = ur_block(x, k=2, pool_size=(1, 1)) #channel reduction

    time_step = x.shape[1]
    linear_units = x.shape[2]*x.shape[3]
    x = tf.reshape(x,[-1,time_step,linear_units])

    x = layers.BatchNormalization(momentum=hparams['BN_MOMENTUM'])(x)

    x = layers.Bidirectional(layers.LSTM(units=hparams['BILSTM_UNITS_SPEC'],
                                         bias_initializer=hparams['BIAS_INIT'],
                                         kernel_initializer=hparams['KERNEL_INIT'], 
                                         return_sequences=True))(x)
    x = CustomAttention(hidden_size=x.shape[2])(x)

    for i, n in enumerate(hparams['UNITS_FCN']):
        x = layers.Dense(units=n, activation="linear", 
                         name='fcn_dense'+str(i+1))(x)
        x = layers.LeakyReLU(alpha=hparams['LEAKY_ALPHA'], 
                             name='fcn_leaky'+str(i+1))(x)
    
    x = layers.Dense(units=4, activation="softmax", name='Softmax')(x)

    model = Model(inputs=model_input_spec, outputs=x, name="Fishnet") 

    model.compile(
        loss=losses.CategoricalCrossentropy(from_logits=False),
        optimizer=optimizers.Adam(
            learning_rate=hparams['LR']
        ),
        metrics=['categorical_accuracy'],
    )
    print(model.summary())

    return model

In [45]:
model = build_model()
# utils.plot_model(model, show_shapes=True)

('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_510 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________________________________________

# Retrieve Data

In [None]:
def retrieve_preprocessed_data():
    valid_actor = speaker_list[idx]
    test_actor = speaker_list[(idx+1)%10]
    file_code = 'Valid_' + valid_actor + 'Test_' + test_actor
    data_file_name = dataset_dir + '/data' + file_code + '.pkl'
    print(data_file_name)

    f = open(data_file_name, 'rb')
    output = pickle.load(f)
    train_features, train_labels = output[0], output[1]  
    test_features, test_labels, test_segments_per_utterance = output[2], output[3], output[5]
    valid_features, valid_labels, valid_segments_per_utterance = output[6], output[7], output[9]  
    f.close()

    train_features = tf.convert_to_tensor(train_features, dtype=tf.float32)
    valid_features = tf.convert_to_tensor(valid_features, dtype=tf.float32)
    test_features = tf.convert_to_tensor(test_features, dtype=tf.float32)

    train_labels = tf.one_hot(train_labels, 4, dtype=tf.float32)
    valid_labels = tf.one_hot(valid_labels, 4, dtype=tf.float32)
    test_labels = tf.one_hot(test_labels, 4, dtype=tf.float32)

    train_labels = tf.reshape(train_labels, [train_labels.shape[0], 4])
    valid_labels = tf.reshape(valid_labels, [valid_labels.shape[0], 4])
    test_labels = tf.reshape(test_labels, [test_labels.shape[0], 4])

    return train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance

In [None]:
def save_results(test_accuracy, test_conf):
    if not os.path.exists(ser_output_dir):
        os.mkdir(ser_output_dir)
    dir = os.path.join(ser_output_dir, 'Run_'+ str(run))
    if not os.path.exists(dir):
        os.mkdir(dir)
    test_actors = speaker_list[idx]
    valid_actors = speaker_list[idx-1][1:]
    file_code = 'Valid_' + ("_").join(valid_actors) + '_Test_' + ("_").join(test_actors) 
    data_file_name = dir + '/' + file_code + '.pkl'
    print(data_file_name)
    f = open(data_file_name, 'wb')
    pickle.dump((test_accuracy, test_conf), f)
    f.close()

# Train and Evaulate

In [None]:
def train_and_evaluate(model, num_epochs=1200):
    i=0
    best_valid_accuracy = 0
    best_epoch = 0

    while i< num_epochs:
        start = (i*60)%train_features.shape[0]
        end = min(start+60, train_features.shape[0])
        train_batch = train_features[start:end, :]
        train_batch_label = train_labels[start:end,:]
        loss = (model.train_on_batch(train_batch, train_batch_label, return_dict=True))

        if((i+1)%5==0):
            valid_acc_uw, valid_conf = evaluate(model, valid_features, 
                                                valid_segments_per_utterance, 
                                                valid_labels)
            
            if valid_acc_uw > best_valid_accuracy:
                best_epoch = i+1
                best_valid_accuracy = valid_acc_uw
                test_accuracy, test_conf = evaluate(model, test_features,
                                                    test_segments_per_utterance, 
                                                    test_labels)
                
                print('*'*30)
                print("Epoch: %05d" %(i+1))
                print(loss)
                print("Valid_UA: " + str(valid_acc_uw)) 
                print("Test UA: " + str(test_accuracy)) 

        if((i+1)%100 == 0):
            print('#'*15 + " Epoch: %05d " %(i+1) + '#'*15 )

        i += 1

    print('*'*30)
    print("Best Epoch: %05d" %(best_epoch))
    print("Best Valid Accuracy: " + str(best_valid_accuracy))
    print("Test_UA: " + str(test_accuracy))    
    print('Test Confusion Matrix:["ang","sad","hap","neu"]')
    print(test_conf)

    save_results(test_accuracy, test_conf)
    return 

In [None]:
def evaluate(model, spec_features, segments_per_utterance, labels, pooling='max'):
    y_pred = np.empty((len(segments_per_utterance),4),dtype=np.float32)
    y_pred_segments = model.predict(spec_features)
    index=0
    for j in range(len(segments_per_utterance)):
        if pooling == 'max':
            y_pred[j,:] = np.max(y_pred_segments[index:index+segments_per_utterance[j][0],:],0) 
        else:
            y_pred[j,:] = np.sum(y_pred_segments[index:index+segments_per_utterance[j][0],:],0)
        index+=(segments_per_utterance[j][0])

    acc_uw = recall(np.argmax(labels,1),np.argmax(y_pred,1),average='macro')
    conf = confusion(np.argmax(labels, 1),np.argmax(y_pred,1))
    return acc_uw, conf

# SER AVERAGE: Fishnet 14.0 

## Run 1 66.8%

In [None]:
run = 1

In [None]:
idx = 0
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1FTest_1M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_306 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 1
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1MTest_2F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_340 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 2
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_2FTest_2M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_374 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 3
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_2MTest_3F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_34 (BatchNo (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 4
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_3FTest_3M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_68 (BatchNo (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 5
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_3MTest_4F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_102 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 6
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_4FTest_4M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_510 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 7
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_4MTest_5F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_136 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 8
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_5FTest_5M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_170 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 9
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_5MTest_1F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_204 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
0.6842990752416982 + 0.7157142857142856 + 0.6921873815167325  + 0.5732502146975831 + 0.6763567906442709 + 0.694933199019099 + 0.7029124289795022 + 0.6775548852399529 + 0.5507145550527903 + 0.7178936313683235

## Run 1 66.8%

In [None]:
run = 2

In [None]:
idx = 0
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1FTest_1M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_238 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 1
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1MTest_2F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_272 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 2
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_2FTest_2M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_306 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 3
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_2MTest_3F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_340 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

KeyboardInterrupt: ignored

In [None]:
idx = 4
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 5
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 6
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 7
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 8
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 9
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
0.6842990752416982 + 0.7157142857142856 + 0.6921873815167325  + 0.5732502146975831 + 0.6763567906442709 + 0.694933199019099 + 0.7029124289795022 + 0.6775548852399529 + 0.5507145550527903 + 0.7178936313683235

## Run 1 66.8%

In [46]:
run = 3

In [47]:
idx = 0
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1FTest_1M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_544 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 1
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1MTest_2F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_612 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 2
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 3
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 4
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 5
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 6
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 7
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 8
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [48]:
idx = 9
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_5MTest_1F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_578 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
0.6842990752416982 + 0.7157142857142856 + 0.6921873815167325  + 0.5732502146975831 + 0.6763567906442709 + 0.694933199019099 + 0.7029124289795022 + 0.6775548852399529 + 0.5507145550527903 + 0.7178936313683235

## Run 1 66.8%

In [None]:
run = 4

In [None]:
idx = 0
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1FTest_1M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_306 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 1
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 2
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 3
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 4
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 5
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 6
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 7
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 8
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 9
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
0.6842990752416982 + 0.7157142857142856 + 0.6921873815167325  + 0.5732502146975831 + 0.6763567906442709 + 0.694933199019099 + 0.7029124289795022 + 0.6775548852399529 + 0.5507145550527903 + 0.7178936313683235

## Run 1 66.8%

In [None]:
run = 5

In [None]:
idx = 0
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1FTest_1M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_374 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 1
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1MTest_2F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_408 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [41]:
idx = 2
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_2FTest_2M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_442 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [42]:
idx = 3
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_2MTest_3F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_476 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 4
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 5
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 6
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 7
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 8
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 9
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
0.6842990752416982 + 0.7157142857142856 + 0.6921873815167325  + 0.5732502146975831 + 0.6763567906442709 + 0.694933199019099 + 0.7029124289795022 + 0.6775548852399529 + 0.5507145550527903 + 0.7178936313683235

# SER AVERAGE: Fishnet 13.0 

## Run 1 

In [None]:
run = 1

In [None]:
idx = 0
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1FTest_1M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_937 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 1
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_1MTest_2F.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_976 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 2
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_2FTest_2M.pkl
('t1', TensorShape([None, 150, 40, 24]))
('t2', TensorShape([None, 150, 20, 24]))
('t3', TensorShape([None, 150, 10, 24]))
('t4', TensorShape([None, 75, 10, 48]))
('b1', TensorShape([None, 150, 10, 48]))
('b2', TensorShape([None, 150, 20, 48]))
('b3', TensorShape([None, 150, 40, 48]))
('h1', TensorShape([None, 150, 20, 96]))
('h2', TensorShape([None, 150, 10, 144]))
('h3', TensorShape([None, 75, 10, 192]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_1015 (Batch (None, 300, 40, 3)   12          spec_features[0][0]              
______________________________________________

In [None]:
idx = 3
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 4
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

Dataset/dataValid_3FTest_3M.pkl
('t1', TensorShape([None, 150, 40, 32]))
('t2', TensorShape([None, 150, 20, 32]))
('t3', TensorShape([None, 150, 10, 32]))
('t4', TensorShape([None, 75, 10, 64]))
('b1', TensorShape([None, 150, 10, 64]))
('b2', TensorShape([None, 150, 20, 64]))
('b3', TensorShape([None, 150, 40, 64]))
('h1', TensorShape([None, 150, 20, 128]))
('h2', TensorShape([None, 150, 10, 192]))
('h3', TensorShape([None, 75, 10, 160]))
Model: "Fishnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
spec_features (InputLayer)      [(None, 300, 40, 3)] 0                                            
__________________________________________________________________________________________________
batch_normalization_102 (BatchN (None, 300, 40, 3)   12          spec_features[0][0]              
_____________________________________________

In [None]:
idx = 5
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 6
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 7
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 8
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
idx = 9
train_features, train_labels, valid_features, valid_labels, valid_segments_per_utterance, \
    test_features, test_labels, test_segments_per_utterance = retrieve_preprocessed_data()

model = build_model()
train_and_evaluate(model)

In [None]:
(0.7083333333333333 + 0.71875 + 0.7604166666666666 + )

## Eval

In [None]:
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h

In [None]:
avg_accs = []
for run in [1, 2, 3, 4, 5]:    
    accs = []
    confs = []
    dir = os.path.join(ser_output_dir, 'Run_'+ str(run))
    if not os.path.exists(dir):
        continue
    for filename in os.listdir(dir):
        filename = os.path.join(dir, filename)
        f = open(filename, 'rb')
        test_accuracy, test_conf = pickle.load(f)
        accs.append(test_accuracy)
        confs.append(test_conf)
        f.close()
    avg_accs.append(np.mean(accs))
print(avg_accs)
m, l, r = mean_confidence_interval(avg_accs)
sdev = np.std(avg_accs)
print('SER AVERAGE: ' + str(m))
print('Standard Deviation: ' + str(sdev))
print('95% Confidence Interval: [' + str(l) + ', ' + str(r) + ']' )