## Jupyter configurations

In [12]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

## Imports

In [1]:
import tensorflow as tf
import keras
import keras.backend as K
import numpy as np
import pandas as pd
import preprocessing as pp
import sys, inspect, argparse

Using TensorFlow backend.


## Accuracy Metric

In [2]:
# percentage of samples that exactly match
def exact_match_accuracy(y_true, y_pred):
    argmax_true = tf.math.argmax(y_true, axis=2)            # onehot to index               (batch, width, onehot:int) -> (batch, width:int)
    argmax_pred = tf.math.argmax(y_pred, axis=2)            # onehot to index               (batch, width, onehot:int) -> (batch, width:int)
    match_char = tf.math.equal(argmax_true, argmax_pred)    # match characters              (batch, width:int) -> (batch, width:bool)
    match_word = tf.math.reduce_all(match_char, axis=1)     # require all character in sample to match      (batch, width:bool) -> (batch:bool)
    match_int = tf.cast(match_word, tf.float32)             # bool to int                                   (batch:bool) -> (batch:int)
    return tf.reduce_mean(match_int)                        # percentage of samples that are an exact match (batch:int) -> int

## Log function

In [59]:
verbose = False
def log(*l, **d): 
    if verbose: print(*l, **d)
        
training_history = []

def training_log(x, y, a, b, e, l, m):
    training_history.append({'x':x, 'y':y, 'architecture':a, 'batch size':b, 'epochs':e, 'loss':l, 'accuracy':m})

## Training parameters

In [208]:
embedding_size = 15
architecture = ''

metrics = ['mean_absolute_error', 'categorical_accuracy', 'binary_accuracy', exact_match_accuracy]
loss = 'mean_squared_logarithmic_error' # poisson mean_squared_logarithmic_error categorical_crossentropy

models = {}

## Load Data

In [5]:
data = pp.load('training_data.p')

## Preprocess Data

In [190]:
subset = slice(None) # only use subset of the dataset
x_cut = (subset, slice(None))
y_cut = (subset, slice(0,12))

x_name, y_name = 'LookupDOSFilePath', 'LookupDOSFilePath'
shuffle_before, shuffle_after = False, True
test_split_frac, showcase_split_frac = 0.2, 0.005


# spli data into x and y as well as training and test set
(train_x, train_y), (test_x, test_y) = pp.train_test_split(data[x_name][subset], data[y_name][subset], test_frac=test_split_frac, shuffle_before=shuffle_before, shuffle_after=shuffle_after) # split training and test
(_, _), (showcase_x, showcase_y) = pp.train_test_split(test_x, test_y, test_frac=showcase_split_frac, shuffle_before=True, shuffle_after=False) # extract small showcase subset of test

log('train_x', train_x.shape, 'train_y', train_y.shape, test_y.shape, sep='\t')

voc_size = pp.char_count

# original size
x_org_shape = [*train_x.shape]
x_org_shape[0] = None
y_org_shape = [*train_y.shape]
y_org_shape[0] = None

# one hot encode output because the model cant do that for some reason
train_x = train_x[x_cut]
test_x = test_x[x_cut]
showcase_x = showcase_x[x_cut]
train_y = train_y[y_cut]
test_y = test_y[y_cut]
showcase_y = showcase_y[y_cut]

# output to onehot categorical encoding
train_x = keras.utils.to_categorical(train_x, voc_size)
test_x = keras.utils.to_categorical(test_x, voc_size)
showcase_x = keras.utils.to_categorical(showcase_x, voc_size)
train_y = keras.utils.to_categorical(train_y, voc_size)
test_y = keras.utils.to_categorical(test_y, voc_size)
showcase_y = keras.utils.to_categorical(showcase_y, voc_size)

# store input and output shape
x_shape = [*train_x.shape]
x_shape[0] = None
y_shape = [*train_y.shape]
y_shape[0] = None

# named shape attributes
x_shape_char, x_shape_ones, *_ = x_shape[1:] + [None]
y_shape_char, y_shape_ones, *_ = y_shape[1:] + [None]

print('train_x', train_x.shape, 'train_y', train_y.shape, 'test_x', test_x.shape, 'test_y', test_y.shape, 'showcase_x', showcase_x.shape, 'showcase_y', showcase_y.shape, sep='\t')

train_x	(19123, 182, 79)	train_y	(19123, 12, 79)	test_x	(4780, 182, 79)	test_y	(4780, 12, 79)	showcase_x	(23, 182, 79)	showcase_y	(23, 12, 79)


## Test and show samlpe output

In [60]:
def test():
    p_one_hot = model.predict(showcase_x)
    p_vector = np.argmax(p_one_hot, 2)
    p_strings = pp.decode_data(p_vector)

    y_vector = np.argmax(showcase_y, 2)
    y_strings = pp.decode_data(y_vector)

    #x_vector = np.argmax(showcase_x, 2)
    x_strings = pp.decode_data(showcase_x)

    x_strings = [s.replace('<Padding>', '') for s in x_strings]
    y_strings = [s.replace('<Padding>', '') for s in y_strings]
    p_strings = [s.replace('<Padding>', '') for s in p_strings]
    x_w, y_w, p_w = max([len(s) for s in x_strings]), max([len(s) for s in y_strings]), max([len(s) for s in p_strings])
    y_p_strings = ['  '.join([x.ljust(x_w), y.ljust(y_w), p.ljust(p_w), str(y==p)]) for x, y, p in zip(x_strings, y_strings, p_strings)]

    print(*y_p_strings, sep='\n', end='\n\n')

    # accuracy on entire training set
    accuracies = model.evaluate(test_x, test_y)
    print(*list(zip([loss]+metrics, accuracies)), sep='\n', end='\n\n') # evaluate and list loss and each metric
    
    return accuracies[0], accuracies[-1]

### Character Encoder-Decoder: Input, Hidden, Output

In [209]:
architecture = 'E-D-NN'

model_E_D_NN = keras.Sequential()
model_E_D_NN.add(keras.layers.Dense(embedding_size, activation='exponential', name='lh', input_shape=(voc_size,)))           # dense layer
model_E_D_NN.add(keras.layers.Dense(voc_size, activation='exponential', name='lo'))           # dense layer
#model_E_D_NN.add(keras.layers.Dropout(0.001))                                                                  # dropout to prevent overfitting
model_E_D_NN.compile(optimizer='adam', loss=loss, metrics=['accuracy', 'mean_absolute_error', 'categorical_accuracy', 'binary_accuracy'])
models[architecture] = model_E_D_NN
print(model_E_D_NN.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lh (Dense)                   (None, 15)                1200      
_________________________________________________________________
lo (Dense)                   (None, 79)                1264      
Total params: 2,464
Trainable params: 2,464
Non-trainable params: 0
_________________________________________________________________
None


## Train Encoder and Decoder

In [None]:
epochs = 5
batch_size = 128
model = models['E-D-NN']

model.fit(train_x.reshape(-1, voc_size), train_x.reshape(-1, voc_size), batch_size=batch_size, epochs=epochs)
model.evaluate(test_x.reshape(-1, voc_size), test_x.reshape(-1, voc_size))

Epoch 1/5
Epoch 2/5
Epoch 3/5
 610816/3480386 [====>.........................] - ETA: 1:19 - loss: 4.3738e-07 - acc: 1.0000 - mean_absolute_error: 1.0983e-04 - categorical_accuracy: 1.0000 - binary_accuracy: 1.0000

### P-NN: Input, Embedding, Output

In [159]:
architecture = 'P-NN'

model_P_NN = keras.Sequential()
model_P_NN.add(keras.layers.Embedding(y_shape_ones, embedding_size, name='le', input_length=x_shape_char))   # embed characters into dense embedded space
model_P_NN.add(keras.layers.Flatten())                                                                       # flatten to 1D per sample
model_P_NN.add(keras.layers.Dense(y_shape_char*y_shape_ones, activation='exponential', name='lo'))           # dense layer
model_P_NN.add(keras.layers.Dropout(0.001))                                                                  # dropout to prevent overfitting
model_P_NN.add(keras.layers.Reshape((y_shape_char, y_shape_ones)))                                           # un flatten
model_P_NN.compile(optimizer='adam', loss=loss, metrics=metrics)
models[architecture] = model_P_NN
print(model_P_NN.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
le (Embedding)               (None, 23, 20)            1580      
_________________________________________________________________
flatten_11 (Flatten)         (None, 460)               0         
_________________________________________________________________
lo (Dense)                   (None, 948)               437028    
_________________________________________________________________
dropout_14 (Dropout)         (None, 948)               0         
_________________________________________________________________
reshape_26 (Reshape)         (None, 12, 79)            0         
Total params: 438,608
Trainable params: 438,608
Non-trainable params: 0
_________________________________________________________________
None


### FF-NN: Input, Embedding, Hidden, Output

In [37]:
architecture = 'FF-NN'
hidden_size = (y_shape_ones*embedding_size + y_shape_char*y_shape_ones) // 2

model_FF_NN = keras.Sequential()
model_FF_NN.add(keras.layers.Embedding(y_shape_ones, embedding_size, name='le', input_length=x_shape_char))   # embed characters into dense embedded space
model_FF_NN.add(keras.layers.Flatten())                                                                       # flatten to 1D per sample
model_FF_NN.add(keras.layers.Dense(hidden_size, activation='exponential', name='lh'))                         # dense layer
model_FF_NN.add(keras.layers.Dropout(0.2))                                                                    # dropout to prevent overfitting
model_FF_NN.add(keras.layers.Dense(y_shape_char*y_shape_ones, activation='exponential', name='lo'))           # dense layer
model_FF_NN.add(keras.layers.Reshape((y_shape_char, y_shape_ones)))                                           # un flatten
model_FF_NN.compile(optimizer='adam', loss=loss, metrics=metrics)
models[architecture] = model_FF_NN
print(model_FF_NN.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
le (Embedding)               (None, 23, 20)            1580      
_________________________________________________________________
flatten_5 (Flatten)          (None, 460)               0         
_________________________________________________________________
lh (Dense)                   (None, 1264)              582704    
_________________________________________________________________
dropout_6 (Dropout)          (None, 1264)              0         
_________________________________________________________________
lo (Dense)                   (None, 948)               1199220   
_________________________________________________________________
reshape_7 (Reshape)          (None, 12, 79)            0         
Total params: 1,783,504
Trainable params: 1,783,504
Non-trainable params: 0
_________________________________________________________________


### LSTM-RNN1: Input, Embedding, (LSTM), Output

In [83]:
architecture = 'LSTM-RNN1'
lstm_hidden_size = embedding_size * 15

model_LSTM_RNN1 = keras.Sequential()
model_LSTM_RNN1.add(keras.layers.Embedding(y_shape_ones, embedding_size, name='le', input_length=x_shape_char))   # embed characters into dense embedded space
#model_LSTM_RNN1.add(keras.layers.Dropout(0.2))                                                                    # dropout to prevent overfitting
model_LSTM_RNN1.add(keras.activation.exponential())
model_LSTM_RNN1.add(keras.layers.LSTM(y_shape_char * y_shape_ones, activation='exponential', implementation=2, unroll=True))                # lstm recurrent cell
model_LSTM_RNN1.add(keras.layers.Reshape((y_shape_char, y_shape_ones)))                                           # un flatten
model_LSTM_RNN1.compile(optimizer='adam', loss=loss, metrics=metrics)
models[architecture] = model_LSTM_RNN1
print(model_LSTM_RNN1.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
le (Embedding)               (None, 23, 20)            1580      
_________________________________________________________________
lstm_7 (LSTM)                (None, 948)               3674448   
_________________________________________________________________
reshape_15 (Reshape)         (None, 12, 79)            0         
Total params: 3,676,028
Trainable params: 3,676,028
Non-trainable params: 0
_________________________________________________________________
None


### LSTM-RNN2: Input, Embedding, (LSTM), Output

In [18]:
architecture = 'LSTM-RNN2'
lstm_hidden_size = embedding_size * 15

model_LSTM_RNN2 = keras.Sequential()
model_LSTM_RNN2.add(keras.layers.Embedding(y_shape_ones, embedding_size, name='le', input_length=x_shape_char))   # embed characters into dense embedded space
model_LSTM_RNN2.add(keras.layers.Dropout(0.2))                                                                    # dropout to prevent overfitting
model_LSTM_RNN2.add(keras.layers.LSTM(lstm_hidden_size, return_sequences=True, return_state=True))                # lstm recurrent cell
model_LSTM_RNN2.add(keras.layers.Dropout(0.2))                                                                    # dropout to prevent overfitting
model_LSTM_RNN2.add(keras.layers.Dense(y_shape_char * y_shape_ones))                                              # dense combine time series into single output
model_LSTM_RNN2.add(keras.layers.Reshape((y_shape_char, y_shape_ones)))                                           # un flatten
model_LSTM_RNN2.compile(optimizer='adam', loss=loss, metrics=metrics)
models[architecture] = model_LSTM_RNN2
print(model_LSTM_RNN2.summary())

TypeError: All layers in a Sequential model should have a single output tensor. For multi-output layers, use the functional API.

### GRU-RNN1: Input, Embedding, (GRU), Output

In [152]:
architecture = 'GRU-RNN1'
lstm_hidden_size = voc_size * 15

model_GRU_RNN1 = keras.Sequential()
model_GRU_RNN1.add(keras.layers.Embedding(y_shape_ones, embedding_size, name='le', input_length=x_shape_char))   # embed characters into dense embedded space
#model_GRU_RNN1.add(keras.layers.Dropout(0.2))                                                                    # dropout to prevent overfitting
model_GRU_RNN1.add(keras.layers.GRU(y_shape_char * y_shape_ones, activation='relu', implementation=2, unroll=True))                # lstm recurrent cell
model_GRU_RNN1.add(keras.layers.Reshape((y_shape_char, y_shape_ones)))                                           # un flatten
model_GRU_RNN1.compile(optimizer='adam', loss=loss, metrics=metrics)
models[architecture] = model_GRU_RNN1
print(model_GRU_RNN1.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
le (Embedding)               (None, 23, 20)            1580      
_________________________________________________________________
gru_8 (GRU)                  (None, 948)               2755836   
_________________________________________________________________
reshape_24 (Reshape)         (None, 12, 79)            0         
Total params: 2,757,416
Trainable params: 2,757,416
Non-trainable params: 0
_________________________________________________________________
None


### GRU-RNN2: Imput Embedding, (GRU), Decoder, Output

In [165]:
architecture = 'GRU-RNN2'
lstm_hidden_size = embedding_size * 15

model_GRU_RNN2 = keras.Sequential()
model_GRU_RNN2.add(keras.layers.Embedding(y_shape_ones, embedding_size, name='le', input_length=x_shape_char))            # embed characters into dense embedded space
#model_GRU_RNN2.add(keras.layers.Dropout(0.2))                                                                            # dropout to prevent overfitting
model_GRU_RNN2.add(keras.layers.GRU(lstm_hidden_size, activation='relu', implementation=2, unroll=True))                  # lstm recurrent cell
#model_GRU_RNN2.add(keras.layers.Dropout(0.2))                                                                            # dropout to prevent overfitting
model_GRU_RNN2.add(keras.layers.Dense(y_shape_char*y_shape_ones, activation='exponential', name='lo'))                    # dense layer, decode/de-embed
model_GRU_RNN2.add(keras.layers.Reshape((y_shape_char, y_shape_ones)))                                                    # un flatten
model_GRU_RNN2.compile(optimizer='adam', loss=loss, metrics=metrics)
models[architecture] = model_GRU_RNN2
print(model_GRU_RNN2.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
le (Embedding)               (None, 23, 20)            1580      
_________________________________________________________________
gru_10 (GRU)                 (None, 240)               187920    
_________________________________________________________________
lo (Dense)                   (None, 948)               228468    
_________________________________________________________________
reshape_27 (Reshape)         (None, 12, 79)            0         
Total params: 417,968
Trainable params: 417,968
Non-trainable params: 0
_________________________________________________________________
None


## Save/Restore weights

In [184]:
DE = models['E-D-NN'].get_weights()
#model_GRU_1 = model
#model_GRU_2 = model
#model_GRU_3 = model

In [187]:
#model.set_weights(GRU)
#model = model_GRU_3
models['E-D-NN'].set_weights(DE)

## Run and Evaluate

In [167]:
epochs = 5
batch_size = 32
model = models['GRU-RNN2']

model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs)
l, a = test()
training_log(x_name, y_name, architecture, batch_size, epochs, l, a)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
89-D8     89-D8     85-F2D   False
81-32     81-32     85-F2D   False
MM88-15   MM88-15   SH88-10  False
93J-397   93J-397   P8B-10   False
93-EDH    93-EDH    90-GDD   False
D88-6     D88-6     P8B-10   False
92-DMK    92-DMK    90-GDD   False
84-ER08   84-ER08   84-T3    False
80H-95    80H-95    88B-10   False
85-XNW    85-XNW    85-F1D   False
S78-2     S78-2     P8B-10   False
T83FC-8   T83FC-8   SH88-10  False
82-QPW    82-QPW    85-F2D   False
85-XSK    85-XSK    85-F1D   False
CR95A-11  CR95A-11  P89--12  False
85-YCH    85-YCH    85-F2D   False
MM88-7    MM88-7    SH88-10  False
AT92-10   AT92-10   S888-11  False
AD91-27   AD91-27   S888-10  False
93J-482   93J-482   P89--02  False
82-LFQ    82-LFQ    85-F2D   False
94-EXE    94-EXE    90-GDD   False
94-EXF    94-EXF    90-GDD   False

('mean_squared_logarithmic_error', 0.0027103656195649668)
('mean_absolute_error', 0.009604004407340513)
('categorical_accuracy', 0.6138249648664

In [168]:
print(*training_history[-10:], sep='\n')

{'x': 'LineName', 'y': 'LineName', 'architecture': 'GRU-RNN1', 'batch size': 64, 'epochs': 1, 'loss': 0.0019871394849425203, 'accuracy': 0.0020920502092050207}
{'x': 'LineName', 'y': 'LineName', 'architecture': 'GRU-RNN1', 'batch size': 64, 'epochs': 1, 'loss': 0.001942680378456183, 'accuracy': 0.0031380753138075313}
{'x': 'LineName', 'y': 'LineName', 'architecture': 'GRU-RNN1', 'batch size': 32, 'epochs': 1, 'loss': 0.0019508493095870172, 'accuracy': 0.0016736401673640166}
{'x': 'LineName', 'y': 'LineName', 'architecture': 'GRU-RNN1', 'batch size': 32, 'epochs': 0, 'loss': 0.0019508493095870172, 'accuracy': 0.0016736401673640166}
{'x': 'LineName', 'y': 'LineName', 'architecture': 'P-NN', 'batch size': 32, 'epochs': 0, 'loss': 0.4744374025067525, 'accuracy': 0.0}
{'x': 'LineName', 'y': 'LineName', 'architecture': 'P-NN', 'batch size': 32, 'epochs': 25, 'loss': 0.00017522789327725847, 'accuracy': 0.9205020921000876}
{'x': 'LineName', 'y': 'LineName', 'architecture': 'P-NN', 'batch size'