In [62]:
import os
import numpy as np
import pandas as pd
from scipy.io import wavfile
from keras.utils.np_utils import to_categorical
from tqdm import tqdm
from utils import which_set, BASE_TRAIN_FOLDER_SPEC, BASE_TRAIN_FOLDER_WAV, labels_to_ints
from time import time
import json

In [40]:
def make_sets(set_name):
    x = []
    y = []
    folder = os.path.join(BASE_TRAIN_FOLDER_SPEC, set_name)
    i = 0
    for filename in os.listdir(folder):
        if filename.startswith('.'):
            continue
        label = filename.split('.')[0]
        inputs = np.load(os.path.join(folder, filename))
        y.append([labels_to_ints[label]] * len(inputs))
        x.append(inputs)
        i += 1
    
    x = np.concatenate(x)
    x = np.expand_dims(x, -1) # needed by ResNet
    y = to_categorical(np.concatenate(y), num_classes=len(labels_to_ints))
    
    return x, y

In [41]:
x_train, y_train = make_sets("train")

In [42]:
x_train.shape, y_train.shape

((51490, 128, 16, 1), (51490, 31))

In [43]:
x_val, y_val = make_sets("validation")

In [44]:
x_val.shape, y_val.shape

((6798, 128, 16, 1), (6798, 31))

In [45]:
x_test, y_test = make_sets("testing")

In [46]:
x_test.shape, y_test.shape

((6835, 128, 16, 1), (6835, 31))

## Model

In [47]:
# source : https://github.com/chrisdinant/speech/blob/master/models.py

from keras.layers import *
from keras.layers.wrappers import TimeDistributed
from keras.layers.merge import Add
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras import backend as K
from keras.utils import plot_model

class ResNet():
    """
    Usage: 
        sr = ResNet([4,8,16], input_size=(50,50,1), output_size=12)
        sr.build()
        followed by sr.m.compile(loss='categorical_crossentropy', 
                                 optimizer='adadelta', metrics=["accuracy"])
        save plotted model with: 
            keras.utils.plot_model(sr.m, to_file = '<location>.png', 
                                   show_shapes=True)
    """
    def __init__(self,
                 filters_list=[], 
                 input_size=None, 
                 output_size=None,
                 initializer='glorot_uniform'):
        self.filters_list = filters_list
        self.input_size = input_size
        self.output_size = output_size
        self.initializer = initializer
        self.m = None        
    
    def _block(self, filters, inp):
        """ one residual block in a ResNet
        
        Args:
            filters (int): number of convolutional filters
            inp (tf.tensor): output from previous layer
            
        Returns:
            tf.tensor: output of residual block
        """
        layer_1 = BatchNormalization()(inp)
        act_1 = Activation('relu')(layer_1)
        conv_1 = Conv2D(filters, (3,3), 
                        padding = 'same', 
                        kernel_initializer = self.initializer)(act_1)
        layer_2 = BatchNormalization()(conv_1)
        act_2 = Activation('relu')(layer_2)
        conv_2 = Conv2D(filters, (3,3), 
                        padding = 'same', 
                        kernel_initializer = self.initializer)(act_2)
        return(conv_2)

    def build(self):
        """
        Returns:
            keras.engine.training.Model
        """
        i = Input(shape = self.input_size, name = 'input')
        x = Conv2D(self.filters_list[0], (3,3), 
                   padding = 'same', 
                   kernel_initializer = self.initializer)(i)
        x = MaxPooling2D(padding = 'same')(x)        
        x = Add()([self._block(self.filters_list[0], x),x])
        x = Add()([self._block(self.filters_list[0], x),x])
        x = Add()([self._block(self.filters_list[0], x),x])
        if len(self.filters_list) > 1:
            for filt in self.filters_list[1:]:
                x = Conv2D(filt, (3,3),
                           strides = (2,2),
                           padding = 'same',
                           activation = 'relu',
                           kernel_initializer = self.initializer)(x)
                x = Add()([self._block(filt, x),x])
                x = Add()([self._block(filt, x),x])
                x = Add()([self._block(filt, x),x])
        x = GlobalAveragePooling2D()(x)
        x = Dense(self.output_size, activation = 'softmax')(x)
        
        self.m = Model(i,x)
        return self.m

In [48]:
# source : https://github.com/chrisdinant/speech/blob/master/train.ipynb
input_size = x_train.shape[1:]
filters_list = [8,16,32]
output_size = len(labels_to_ints)
date = '1003'
arch = 'resnet8_16_32'

In [50]:
sr = ResNet(filters_list, input_size, output_size)
sr.build()
sr.m.compile(loss='categorical_crossentropy', 
             optimizer='adadelta', 
             metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.


In [53]:
plot_model(sr.m, 
           to_file = './models/{}_{}.png'.format(arch,date), 
           show_shapes = True)

In [56]:
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping

checkpointer = ModelCheckpoint(filepath='./models/{}_{}_best.h5'.format(arch, date),
                               verbose=0,
                               save_best_only=True)
   
#earlystopping = EarlyStopping()

tensorboard = TensorBoard(log_dir = './logs/{}_{}_{}'.format(arch, date, time()), 
                          histogram_freq = 0, 
                          write_graph = True, 
                          write_images = True)

In [57]:
history = sr.m.fit(x_train, 
                   y_train, 
                   batch_size = 128, 
                   epochs = 5, 
                   verbose = 1, shuffle = True, 
                   #class_weight = class_weights,
                   validation_data = (x_val, y_val), 
                   callbacks = [checkpointer, tensorboard])

Instructions for updating:
Use tf.cast instead.
Train on 51490 samples, validate on 6798 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [63]:
with open(f"logs/{date}_{arch}_history.json", "w") as f:
    json.dump(history.history, f)

In [64]:
history = sr.m.fit(x_train, 
                   y_train, 
                   batch_size = 128, 
                   epochs = 3, 
                   verbose = 1, shuffle = True, 
                   #class_weight = class_weights,
                   validation_data = (x_val, y_val), 
                   callbacks = [checkpointer, tensorboard])

Train on 51490 samples, validate on 6798 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [65]:
sr.m.evaluate(x_test, y_test, batch_size=128)



[1.5919170822465865, 0.6032187272966911]

### Pre-computed weights :
See https://keras.io/applications/#resnet