## Imports ...

In [1]:
import tensorflow as tf
from keras.utils import to_categorical
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from random import randint
import numpy as np

# Set up GPU:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices: tf.config.experimental.set_memory_growth(device, True)
print(gpu_devices)
print(str(len(gpu_devices)) + " GPU(s) available" if len(gpu_devices) > 0 else "Warning: no GPU available.")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
1 GPU(s) available


Using TensorFlow backend.


## Generate some fake training data ...

Create some random data and some random labels. 
We have 14 random features, but the length of each time series varies.
Also, the first seven features are distributed as int in interval [0; 8192].
The second seven features are distributed as int in interval [0; 16384].
In reality, I load this data from some file. Here it just mocks the dataset.

In [2]:
DATA = []

for i in range(15000):
    new_timeseries = []
    
    for j in range(randint(0, 200)):
        # first seven values are int[0, 8192]
        # second seven values are int[0, 16384]
        timestep = [
            randint(0, 8192), 
            randint(0, 8192), 
            randint(0, 8192), 
            randint(0, 8192), 
            randint(0, 8192), 
            randint(0, 8192), 
            randint(0, 8192),
            randint(0, 16384), 
            randint(0, 16384), 
            randint(0, 16384), 
            randint(0, 16384), 
            randint(0, 16384), 
            randint(0, 16384),
            randint(0, 16384)
        ]
        new_timeseries.append(timestep)
        
    DATA.append(new_timeseries)

print("done")

done


As the above timeseries have different lengths, I go on and pad them up to an equal length which is the maximum length of all timeseries.

Next, I normalize the sequence to have them as float in the interval [0; 1].
I do so by iterating over the padded sequence and dividing each feature tuple through its possible max value.

In [3]:
# Pad sequence to have timeseries of equal length.
DATA_PADDED = pad_sequences(
                DATA, 
                maxlen=max(len(e) for e in  DATA),
                padding='post'
              )

# Normalize sequence into float interval of [0, 1]
def normalize_sequence(sequence):
    normalized_seq = []
    for tpl in sequence:
        normalized_seq.append(
            (
            tpl[0] / 8192,
            tpl[1] / 8192,
            tpl[2] / 8192,
            tpl[3] / 8192,
            tpl[4] / 8192,
            tpl[5] / 8192,
            tpl[6] / 8192,
            tpl[7] / 16384,
            tpl[8] / 16384,
            tpl[9] / 16384,
            tpl[10] / 16384,
            tpl[11] / 16384,
            tpl[12] / 16384,
            tpl[13] / 16384,
            )
        )
    return normalized_seq

DATA_NORMALIZED = []
for d in DATA_PADDED:
    DATA_NORMALIZED.append(normalize_sequence(d))
    
DATA_NORMALIZED = np.array(DATA_NORMALIZED, dtype=np.float32)

print(DATA_NORMALIZED)
print(len(DATA_NORMALIZED))

[[[0.48901367 0.10192871 0.08276367 ... 0.2434082  0.39611816 0.5031128 ]
  [0.59265137 0.2076416  0.28125    ... 0.6668701  0.38830566 0.98553467]
  [0.1595459  0.9758301  0.04187012 ... 0.58599854 0.6897583  0.7102051 ]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.8162842  0.3942871  0.31311035 ... 0.79901123 0.17285156 0.7988281 ]
  [0.23620605 0.9272461  0.17138672 ... 0.92871094 0.3303833  0.4586792 ]
  [0.93640137 0.86291504 0.74731445 ... 0.40264893 0.8817749  0.04888916]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.9724121  0.19616699 0.65478516 ... 0.79156494 0.960083   0.96014404]
  [0.6173096  0.487426

In [4]:
# Create labels
LABELS = []
for i in range(15000):
    LABELS.append(randint(0, 9))

# Manually create one hot encoding
LABELS_OHC = to_categorical(LABELS)

print(LABELS_OHC)
print(len(LABELS_OHC))

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]
15000


In [5]:
# Create model
number_of_classes = len(set(LABELS))  # count unique labels
n_timesteps, n_features = DATA_PADDED.shape[1], DATA_PADDED.shape[2]

print('input_shape', n_timesteps, n_features)
print('output', number_of_classes)

# `batch_size` must be provided as part of model.fit's batch_input_shape-parameter to have LSTMs fit on the GPU
# Otherwise this parameter will default and bad things will happen (see tensorflow issue #37942).
# the batch_size must be a divisor of len(x), i.e. `DATA_NORMALIZED[:split_index]` % batch_size == 0 !!
batch_size = 100

model = Sequential()
model.add(LSTM(100, batch_input_shape=(batch_size, n_timesteps, n_features), return_sequences=True))
model.add(LSTM(100, return_sequences=True))
model.add(LSTM(100))
model.add(Dense(number_of_classes, activation='softmax'))

model.summary()

print("Created model.")

model.compile(optimizer=Adam(lr=0.001),
              loss='categorical_crossentropy', 
              metrics=['acc'])

print("Compiled model.")

input_shape 200 14
output 10
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 200, 100)           46000     
_________________________________________________________________
lstm_1 (LSTM)                (100, 200, 100)           80400     
_________________________________________________________________
lstm_2 (LSTM)                (100, 100)                80400     
_________________________________________________________________
dense (Dense)                (100, 10)                 1010      
Total params: 207,810
Trainable params: 207,810
Non-trainable params: 0
_________________________________________________________________
Created model.
Compiled model.


In [6]:
print(type(DATA_NORMALIZED))
print(type(LABELS_OHC))

assert isinstance(DATA_NORMALIZED, np.ndarray)
assert isinstance(LABELS_OHC, np.ndarray)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [10]:
force_cpu = False
use_cpu = len(tf.config.experimental.list_physical_devices('GPU')) == 0

# We must not use `validation_split` as this parameter leads to severe crashes.
split_index = int(len(DATA_NORMALIZED) * 0.8)

assert split_index % batch_size == 0, \
    "batch_size must be a divisor of split_index: split_index % batch_size == 0, your rest currently is" \
    + str(split_index % batch_size)
assert len(DATA_NORMALIZED[:split_index]) % batch_size == 0, "batch_size must divide len(X) without any rest!"

print("Training with index 0 -", split_index)
print("Validating with index", split_index, " - ", len(DATA_NORMALIZED))

print("Starting model.fit()", "on CPU." if use_cpu else "on GPU.")

def model_fit():
    """"Small wrapper function for model.fit, as it is sometimes is called via `with(CPU)` statement and
     sometimes runs on the GPU. Returns a history."""
    return model.fit(
        x=DATA_NORMALIZED[:split_index],
        y=LABELS_OHC[:split_index],
        batch_size=batch_size,
        epochs=500,
        verbose=2
    )

if use_cpu:
    print("Using CPU ...")
    with tf.device("/device:CPU:0"):
        history = model_fit()
else:
    print("Using GPU ...")
    history = model_fit()
    
print("Finished model.fit().")

Training with index 0 - 12000
Validating with index 12000  -  15000
Starting model.fit() on GPU.
Using GPU ...
Epoch 1/500
120/120 - 3s - loss: 1.9490 - acc: 0.2605
Epoch 2/500
120/120 - 3s - loss: 1.9477 - acc: 0.2609
Epoch 3/500
120/120 - 3s - loss: 1.9251 - acc: 0.2676
Epoch 4/500
120/120 - 3s - loss: 1.9271 - acc: 0.2746
Epoch 5/500
120/120 - 3s - loss: 1.9241 - acc: 0.2715
Epoch 6/500
120/120 - 3s - loss: 1.9191 - acc: 0.2780
Epoch 7/500
120/120 - 3s - loss: 1.9552 - acc: 0.2604
Epoch 8/500
120/120 - 3s - loss: 1.9321 - acc: 0.2698
Epoch 9/500
120/120 - 3s - loss: 1.9615 - acc: 0.2603
Epoch 10/500
120/120 - 3s - loss: 1.9282 - acc: 0.2737
Epoch 11/500
120/120 - 3s - loss: 1.9105 - acc: 0.2785
Epoch 12/500
120/120 - 3s - loss: 1.8888 - acc: 0.2829
Epoch 13/500
120/120 - 3s - loss: 1.9765 - acc: 0.2551
Epoch 14/500


KeyboardInterrupt: 

In [9]:
print("Starting model.evaluate().")
if use_cpu:
    with tf.device("/device:CPU:0"):
        evaluation = model.evaluate(x=DATA_NORMALIZED[split_index:],
                                    y=LABELS_OHC[split_index:])
else:
    evaluation = model.evaluate(x=DATA_NORMALIZED[split_index:],
                                y=LABELS_OHC[split_index:])
print("Finished model.evaluate().")

Starting model.evaluate().


InvalidArgumentError:    Invalid input_h shape: [1,100,100] [1,32,100]
	 [[{{node CudnnRNN}}]]
	 [[sequential/lstm/PartitionedCall]] [Op:__inference_test_function_130081]

Function call stack:
test_function -> test_function -> test_function
