In [1]:
import h5py
import os
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, Dropout, Masking, TimeDistributed
%matplotlib inline
import matplotlib.pyplot as plt
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
data_path = '../data'
train_data = h5py.File(os.path.join(data_path, 'TRAIN', 'features.h5'), 'r')
test_data = h5py.File(os.path.join(data_path, 'TEST', 'features.h5'), 'r')

In [13]:
train_keys = [k for k in train_data.keys() if k[8:10] != 'SA']

In [46]:
test_speakers = {
    "MDAB0", "MWBT0", "FELC0",    
    "MTAS1", "MWEW0", "FPAS0",   
    "MJMP0", "MLNT0", "FPKT0",   
    "MLLL0", "MTLS0", "FJLM0",    
    "MBPM0", "MKLT0", "FNLP0",
    "MCMJ0", "MJDH0", "FMGD0",    
    "MGRT0", "MNJM0", "FDHC0",
    "MJLN0", "MPAM0", "FMLD0"
}

In [64]:
validation_speakers = {
    "FAKS0", "FDAC1", "FJEM0", "MGWT0", "MJAR0",
    "MMDB1", "MMDM2", "MPDF0", "FCMH0", "FKMS0",
    "MBDG0", "MBWM0", "MCSH0", "FADG0", "FDMS0", 
    "FEDW0", "MGJF0", "MGLB0", "MRTK0", "MTAA0",
    "MTDT0", "MTHC0", "MWJG0", "FNMR0", "FREW0",
    "FSEM0", "MBNS0", "MMJR0", "MDLS0", "MDLF0",
    "MDVC0", "MERS0", "FMAH0", "FDRW0", "MRCS0",
    "MRJM4", "FCAL1", "MMWH0", "FJSJ0", "MAJC0",
    "MJSW0", "MREB0", "FGJD0", "FJMG0", "MROA0",
    "MTEB0", "MJFC0", "MRJR0", "FMML0", "MRWS1"
}

In [65]:
validation_keys = []
core_test_keys = []
full_test_keys = []
for key in test_data.keys():
    if key[8:10] == 'SA':
        continue
    if key[3:8] in test_speakers:
        core_test_keys.append(key)
        full_test_keys.append(key)
    elif key[3:8] in validation_speakers:
        validation_keys.append(key)
    else:
        full_test_keys.append(key)

In [70]:
# Just some checks
val_speakers = set([k[3:8] for k in validation_keys])
train_speakers = set([k[3:8] for k in train_keys])
full_test_speakers = set([k[3:8] for k in full_test_keys])
core_test_speakers = set([k[3:8] for k in core_test_keys])
val_sents = set([k[8:] for k in validation_keys])
train_sents = set([k[8:] for k in train_keys])
# Check that there is no overlap between training and validation speakers or sentences
print(train_speakers.intersection(val_speakers))
print(train_sents.intersection(val_sents))
# Verify that the size of all sets match with Halberstadt (1998) page 34
print(len(train_speakers))
print(len(val_speakers))
print(len(full_test_speakers))
print(len(core_test_speakers))
print(len(train_keys))
print(len(validation_keys))
print(len(core_test_keys))
print(len(full_test_keys))

set()
set()
462
50
118
24
3696
400
192
944


In [28]:
class TimitGenerator(object):
    def __init__(self, datafile, keys=None, batch_size=20, shuffle=True, mask_value=0.):
        self._datafile = datafile
        self._batch_size = batch_size
        if keys is None:
            self._keys = list(datafile.keys())
        else:
            self._keys = keys
        self._batch_index = 0
        self._shuffle = True
        self._mask_value = mask_value
        
    def shuffle_keys(self):
        random.shuffle(self._keys)
        
    def next_sample(self):
        key = self._keys[self._batch_index]
        self._batch_index += 1
        if self._batch_index == len(self._keys):
            self._batch_index = 0
            if self._shuffle:
                self.shuffle_keys()
        return key
    
    def normalize(self, sequence):
        return (sequence - np.mean(sequence, axis=0)) / np.std(sequence, axis=0)
    
    def pad_features(self, sequence, target_length):
        return np.pad(sequence, 
                      ((0, target_length - sequence.shape[0]), (0, 0)), 'constant', 
                      constant_values=((0, self._mask_value), (0, 0)))
    
    def pad_labels(self, sequence, target_length):
        return np.pad(sequence, 
                      ((0, target_length - sequence.shape[0]), (0, 0)), 'constant', 
                      constant_values=((0, 0), (0, 0)))
            
    def generator(self):
        while True:
            batch_keys = [self.next_sample() for _ in range(self._batch_size)]
            batch_features = [self._datafile[key]['features'] for key in batch_keys]
            batch_labels = [self._datafile[key]['labels'] for key in batch_keys]
            max_length = np.max([f.shape[0] for f in batch_features])
            
            sample_weights = np.zeros((self._batch_size, max_length), dtype=int)
            for i in range(self._batch_size):
                sample_weights[i, :batch_features[i].shape[0]] = 1
            
            batch_features = np.array([self.pad_features(self.normalize(f), max_length) for f in batch_features])
            batch_labels = np.array([self.pad_labels(f, max_length) for f in batch_labels])
            
            yield batch_features, batch_labels, sample_weights
            
            

In [29]:
gen = TimitGenerator(train_data)

In [30]:
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(None, 13)))
model.add(LSTM(100, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(100, return_sequences=True))
# TimeDistributed is nodig om het Dense deel op iedere time step toe te passen
model.add(TimeDistributed(Dense(39, activation='softmax')))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'], sample_weight_mode='temporal')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_6 (Masking)          (None, None, 13)          0         
_________________________________________________________________
lstm_11 (LSTM)               (None, None, 100)         45600     
_________________________________________________________________
dropout_6 (Dropout)          (None, None, 100)         0         
_________________________________________________________________
lstm_12 (LSTM)               (None, None, 100)         80400     
_________________________________________________________________
time_distributed_6 (TimeDist (None, None, 39)          3939      
Total params: 129,939
Trainable params: 129,939
Non-trainable params: 0
_________________________________________________________________


In [31]:
model.fit_generator(gen.generator(), epochs=5, steps_per_epoch=100)

Epoch 1/5
Epoch 2/5
Epoch 3/5

KeyboardInterrupt: 

In [10]:
test_data = test_file

In [11]:
X1 = test_data['DR1FAKS0SA1']['features']
y1 = test_data['DR1FAKS0SA1']['labels']

In [12]:
pred = model.predict(np.expand_dims(X1, axis=0))[0]
phones = np.argmax(pred, axis=1)

NameError: name 'model' is not defined

In [None]:
phones

In [None]:
np.argmax(y1, axis=1)

In [13]:
y1

<HDF5 dataset "labels": shape (387, 39), type "<f8">

In [14]:
y1[0,:]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])