In [1]:
from __future__ import print_function

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM, Dropout, CuDNNLSTM
from keras.layers import TimeDistributed
from keras.layers import Bidirectional, LeakyReLU
from keras import optimizers, backend, callbacks
import keras_metrics

%run -i 'create_char_embedded_dataset.py'

y_enc_train = np.squeeze(y_enc_train,axis=1)
y_enc_test = np.squeeze(y_enc_test,axis=1)
y_enc_validation = np.squeeze(y_enc_validation,axis=1)


# Test integrity of datasets

print('===================')
print('===================')
print('Checking data integrity of datasets')
print()

def check_data_for_multiple_ones(dataset, sequence_size):
    for i, sentence in enumerate(dataset):
        if (np.count_nonzero(sentence) != sequence_size):
            print('WARNING! multiple 1s found in array in dataset at index = ', i)
            return

def check_data_for_zero_array(dataset):
    for sentence in dataset:
        if np.any(sentence) is False:
            print('WARNING! 0 array found in dataset')
            return

def check_data(dataset, dataset_name, sequence_size):
    print('Checking data integrity of', dataset_name)
    print('Shape ', dataset.shape)
    print('Max value ', np.amax(dataset))
    print('Min value ',np.amin(dataset))
    print('Any NaN? ', np.isnan(np.min(dataset)))
    print('Checking for zero arrays')
    check_data_for_zero_array(dataset)
    print('Checking for arrays with multiple ones')
    check_data_for_multiple_ones(dataset, sequence_size)
    print()

check_data(X_enc_train, 'X_enc_train', 40)
check_data(X_enc_test, 'X_enc_test', 40)
check_data(X_enc_validation, 'X_enc_validation', 40)

check_data(y_enc_train, 'y_enc_train', 1)
check_data(y_enc_test, 'y_enc_test', 1)
check_data(y_enc_validation, 'y_enc_validation', 1)

print('len(char_indices) ',len(char_indices))
print('len(indics_char) ',len(indices_char))

Using TensorFlow backend.


Processing pretrained character embeds...
Available characters for embedding
number of characters in dictionary =  94
number of dimensions =  300
embedding_vectors.keys() =  dict_keys(['$', '(', ',', '0', '4', '8', '<', '@', 'D', 'H', 'L', 'P', 'T', 'X', '\\', '`', 'd', 'h', 'l', 'p', 't', 'x', '|', '#', "'", '+', '/', '3', '7', ';', '?', 'C', 'G', 'K', 'O', 'S', 'W', '[', '_', 'c', 'g', 'k', 'o', 's', 'w', '{', '"', '&', '*', '.', '2', '6', ':', '>', 'B', 'F', 'J', 'N', 'R', 'V', 'Z', '^', 'b', 'f', 'j', 'n', 'r', 'v', 'z', '~', '!', '%', ')', '-', '1', '5', '9', '=', 'A', 'E', 'I', 'M', 'Q', 'U', 'Y', ']', 'a', 'e', 'i', 'm', 'q', 'u', 'y', '}'])

Importing Barbieri dataset into list of list of characters
size of entries_train =  293448
size of entries_test =  4809
size of entries_validation =  4718

Preparing data for character embedding

Converting dataset into character representation
corpus length: 6483529
total chars: 71

Use indices_char[NUM] to lookup character by 
indices_cha

In [6]:
# from keras.layers import CuDNNLSTM

# backend.set_floatx('float64') # default is float 32 - recommended to fix loss = nan
print('Keras backend using float type = ', backend.floatx())

print('Build model...')
model = Sequential()
model.reset_states() # Attempting to reinitialize model weights if running modules again

# model.add(Dropout(0.4)) # Attempt to fix loss nan

model.add( Bidirectional(
#                         CuDNNLSTM(
                        LSTM(
                            128,
#                             kernel_initializer='glorot_uniform', # Default is glorot_uniform
#                              activation = 'relu', # default is tanh
#                              inner_activation = 'relu', # default is hard_sigmoid
#                              recurrent_activation = 'softmax',
#                              dropout_W = 0.0, # default is 0.0
#                              dropout_U = 0.0, # default is 0.0
                             return_sequences=False,
                             input_shape=(40,len(char_indices))
                            )))


# model.add(BatchNormalization()) # Attempt to fix loss nan
# model.add(Dropout(0.4)) # Attempt to fix loss nan

# Moved these to be before the dense so they are applied to output layers of LSTM
# model.add(Activation('relu'))
# model.add(LeakyReLU(alpha=0.1)) # this was the biggest fix for the loss nan

model.add( Dense(5,
#                  activation='relu'  # Uses no activation by default
                )
         )

# model.add(BatchNormalization()) # Attempt to fix loss nan
# model.add(Dropout(0.4)) # Attempt to fix loss nan

# If enabled here, these apply to the output of the dense 5D rather than LSTM 128D
# model.add(Activation('relu'))
model.add(LeakyReLU(alpha=0.1)) # this was the biggest fix for the loss nan



optimiz = optimizers.Adam(
#                         lr = 0.000001, # default lr = 0.001 - turned off because Adam dynamically adjusts
#                         clipnorm = 1.0, # Suggested to help with NaN
#                         clipvalue = 0.5
                        )

metrics = [
              'accuracy',
#               keras_metrics.precision(label=class_value),
#               keras_metrics.recall(label=class_value)
          ]

# try using different optimizers and different optimizer configs
model.compile(loss = 'categorical_crossentropy',
              optimizer = optimiz,
              metrics = metrics
             )

Keras backend using float type =  float32
Build model...


In [None]:
print('Train...')


model.fit(X_enc_train, y_enc_train,
          batch_size=512,
          epochs=10,
          verbose=1,
          validation_data=(X_enc_validation, y_enc_validation),
          callbacks = [
              callbacks.TerminateOnNaN() # Automatically terminate fit if loss = nan
          ]
         )

# model.summary() # Use if model throws dimensions errors

score, acc = model.evaluate(X_enc_test, y_enc_test)
# score, acc, prec, rec = model.evaluate(X_enc_test, y_enc_test)
print('Test score:', score)
print('Test accuracy:', acc)
# print('Test prec:', prec)
# print('Test rec:', rec)

Train...
Train on 293448 samples, validate on 4718 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
 28160/293448 [=>............................] - ETA: 4:36 - loss: 1.3590 - acc: 0.4623