# Packages

In [98]:
#pip install keras
#pip install keras.utils
#pip install tensorflow

In [146]:
import numpy as np
import pandas as pd

from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Masking
from keras.layers import Dropout
from keras.layers import Concatenate
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model

import pickle
import itertools

np.set_printoptions(suppress=True)

# Raw data

In [41]:
seqFile = 'sequences.pkl'
labelFile = 'labels.pkl'
timeFile = 'times.pkl'

sequences = np.array(pickle.load(open(seqFile, 'rb')), dtype='object')
labels = np.array(pickle.load(open(labelFile, 'rb')), dtype='float32')
times = np.array(pickle.load(open(timeFile, 'rb'), encoding='latin1'), dtype='object')

In [139]:
x1 = np.concatenate([np.ones(50), np.zeros(50)])
x2 = np.concatenate([np.zeros(50), np.ones(50)])
xs = np.array(list(zip(x1, x2)))

In [126]:
np.random.random_integers(0, 1, len(seq_idx))

  np.random.random_integers(0, 1, len(seq_idx))


array([0, 0, 1, ..., 0, 0, 1])

# Convert raw data to pandas DataFrame

In [8]:
seq_idx = np.arange(len(sequences))
seq_idx = np.concatenate([list(itertools.repeat(x, len(y))) for x,y in zip(seq_idx, sequences)])

In [129]:
d = {
    'id': seq_idx, 
    'values': np.concatenate(sequences), 
    'times': np.concatenate(times)
    }

d = pd.DataFrame(d)

d.head()

Unnamed: 0,id,values,times
0,0,79,50
1,0,44,57
2,0,89,77
3,0,24,124
4,0,36,199


# Extract sequences and time duration feature from DF

In [12]:
sequences = np.array(d.groupby(['id'], sort=False).values.apply(list).tolist(), dtype=object)
times = np.array(d.groupby(['id'], sort=False).times.apply(list).tolist(), dtype=object)

# Pad sequences

In [20]:
seqs_padded = sequence.pad_sequences(sequences)
seqs_padded[0]

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 79, 44,
       89, 24, 36, 14, 10, 10,  5, 59, 66, 91, 62, 11, 60, 54, 22, 61, 41,
       51, 52, 21, 12,  1,  7, 33, 30, 79, 78, 40, 48, 98, 56, 39, 85,  8,
       84, 51, 23, 13, 22, 26, 37, 66, 47, 21, 72, 67, 16,  3, 22, 87, 88,
        6, 56, 85,  8,  8, 76,  7, 47, 23, 14, 84, 95, 22, 78, 46, 48, 16,
       22, 34], dtype=int32)

# Pad time duration feature

In [18]:
times_padded = sequence.pad_sequences(times)
times_padded = np.array(times_padded)
times_padded[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   50,
         57,   77,  124,  199,  209,  262,  361,  457,  473,  561,  633,
        642,  651,  663,  759,  840,  925,  955,  958, 1019, 1069, 1137,
       1148, 1208, 1255, 1292, 1309, 1407, 1445, 1495, 1593, 1681, 1779,
       1818, 1823, 1920, 1995, 2078, 2114, 2159, 2178, 2241, 2286, 2348,
       2436, 2479, 2491, 2514, 2525, 2579, 2656, 2726, 2814, 2903, 2964,
       3061, 3089, 3103, 3175, 3245, 3248, 3305, 3315, 3318, 3347, 3396,
       3493, 3585, 3666, 3747, 3790], dtype=int32)

# One-hot encode sequences

In [21]:
seqs_encoded = to_categorical(seqs_padded)
seqs_encoded[0]

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [23]:
seqs_encoded.shape
# n sequences = 100   len(sequences)
# max length = 104    np.max([np.max(i) for i in sequences])
# unique ints = 100   len(np.unique(np.concatenate(sequences)))

(100, 104, 100)

# Append time duration feature to encoded sequences

In [54]:
def append_time(ohe, time):
    out = [np.append(i, j) for i,j in zip(ohe, time)]
    return(out)

seqs_with_time = np.array([append_time(seqs_encoded[i], times_padded[i]) for i in range(len(seqs_encoded))])

In [55]:
seqs_with_time[0]

array([[   1.,    0.,    0., ...,    0.,    0.,    0.],
       [   1.,    0.,    0., ...,    0.,    0.,    0.],
       [   1.,    0.,    0., ...,    0.,    0.,    0.],
       ...,
       [   0.,    0.,    0., ...,    0., 3666., 3666.],
       [   0.,    0.,    0., ...,    0., 3747., 3747.],
       [   0.,    0.,    0., ...,    0., 3790., 3790.]])

# Create LSTM

In [134]:
samples, timesteps, features = seqs_with_time.shape

In [97]:
model = Sequential()
model.add(Masking(mask_value = 0., input_shape=(timesteps, features)))
model.add(LSTM(100, return_sequences=True, dropout=0.2))
model.add(layers.Flatten())

model2 = Sequential()
model2.add(Dense(1, input_shape=(1,), activation='sigmoid'))

merged = Concatenate([model, model2])

merged.add(Dense(1, activation='sigmoid'))
merged.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

merged.summary()

AttributeError: 'Concatenate' object has no attribute 'add'

In [None]:
static_out = (static_input)

x = LSTM(n_cell_lstm, return_sequences=True)(dynamic_input)
x = Flatten()(x)
dynamic_out = (x)

z = concatenate([dynamic_out, static_out])

z = Dense(64, activation='relu')(z)

main_output = Dense(classes, activation='softmax', name='main_output')(z)

In [143]:
#from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.recurrent import LSTM
from keras.layers.merge import concatenate

static_out = (xs)

visible = Input(shape=(timesteps, features))
hidden1 = LSTM(10, return_sequences=True)(visible)
hidden1 = Flatten()(hidden1)

z = concatenate([hidden1, xs])

z = Dense(64, activation='relu')(z)

main_output = Dense(1, activation='softmax', name='main_output')(z)

model = Model(inputs=[visible], outputs=main_output)

In [144]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_8 (InputLayer)        [(None, 104, 102)]        0         
                                                                 
 lstm_14 (LSTM)              (None, 104, 10)           4520      
                                                                 
 flatten_8 (Flatten)         (None, 1040)              0         
                                                                 
 concatenate_5 (Concatenate)  (100, 1042)              0         
                                                                 
 dense_7 (Dense)             (100, 64)                 66752     
                                                                 
 main_output (Dense)         (100, 1)                  65        
                                                                 
Total params: 71,337
Trainable params: 71,337
Non-trainable p

In [148]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [153]:
model.fit(
	x=seqs_with_time, y=labels,
	epochs=3, batch_size=1)

Epoch 1/3


InvalidArgumentError: Graph execution error:

Detected at node 'model/concatenate_5/concat' defined at (most recent call last):
    File "/opt/conda/lib/python3.9/runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/opt/conda/lib/python3.9/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/opt/conda/lib/python3.9/site-packages/ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "/opt/conda/lib/python3.9/site-packages/traitlets/config/application.py", line 846, in launch_instance
      app.start()
    File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 677, in start
      self.io_loop.start()
    File "/opt/conda/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 596, in run_forever
      self._run_once()
    File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 1890, in _run_once
      handle._run()
    File "/opt/conda/lib/python3.9/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 473, in dispatch_queue
      await self.process_one()
    File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 462, in process_one
      await dispatch(*args)
    File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 369, in dispatch_shell
      await result
    File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 664, in execute_request
      reply_content = await reply_content
    File "/opt/conda/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 355, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/opt/conda/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2863, in run_cell
      result = self._run_cell(
    File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2909, in _run_cell
      return runner(coro)
    File "/opt/conda/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3106, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3309, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3369, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_7847/182558895.py", line 1, in <cell line: 1>
      model.fit(
    File "/opt/conda/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 859, in train_step
      y_pred = self(x, training=True)
    File "/opt/conda/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/conda/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/functional.py", line 451, in call
      return self._run_internal_graph(
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/opt/conda/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/conda/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.9/site-packages/keras/layers/merge.py", line 183, in call
      return self._merge_function(inputs)
    File "/opt/conda/lib/python3.9/site-packages/keras/layers/merge.py", line 531, in _merge_function
      return backend.concatenate(inputs, axis=self.axis)
    File "/opt/conda/lib/python3.9/site-packages/keras/backend.py", line 3313, in concatenate
      return tf.concat([to_dense(x) for x in tensors], axis)
Node: 'model/concatenate_5/concat'
ConcatOp : Dimension 0 in both shapes must be equal: shape[0] = [1,1040] vs. shape[1] = [100,2]
	 [[{{node model/concatenate_5/concat}}]] [Op:__inference_train_function_42348]

# Fit, evaluate, and predict

In [68]:
model.fit(seqs_with_time, labels, epochs=3, batch_size=10)

RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.

In [59]:
model.evaluate(seqs_with_time, labels, verbose=0)

[0.2918994426727295, 0.9200000166893005]

In [60]:
model.predict(seqs_with_time)[0:5]

array([[0.05783281],
       [0.05624515],
       [0.06183383],
       [0.05807522],
       [0.05601501]], dtype=float32)

In [None]:
seqs = np.array(d.groupby(['id'], sort=False).values.apply(list).tolist())
ts = np.array(d.groupby(['id'], sort=False).times.apply(list).tolist())

  seqs = np.array(d.groupby(['id'], sort=False).values.apply(list).tolist())
  ts = np.array(d.groupby(['id'], sort=False).times.apply(list).tolist())
