In [33]:
import json
from pprint import pprint
import pandas as pd
import numpy as np
from IPython.display import display, HTML, Image
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook
from IPython.display import SVG
from keras.models import Model, model_from_json
from keras.layers import Input, Dense, LSTM, concatenate, Reshape
from keras.utils.vis_utils import plot_model, model_to_dot
from keras.utils import plot_model
from keras.optimizers import Adam, SGD
from keras.preprocessing import sequence

In [22]:
def get_net():
    
    hichy_ip = Input(shape = (None,2))
    hichy1 = LSTM(64, stateful = False)(hichy_ip)
    hichy3 = Dense(2, activation='sigmoid')(hichy1)

    
    model = Model(inputs=hichy_ip,outputs=hichy3)
    opt=Adam(lr=0.0001)
    l_val = "binary_crossentropy"
    model.compile(loss=l_val, optimizer=opt)
    return model

In [23]:
#First Example
# X is an array which does not have any different number of sequences
# All the sequences are of length 4
# So in LSTM Input, we have 3D data (batch_size, timesteps, features)

# batch_size is the number of rows in the training data, but it can be any number smaller than total training data
# if batch size is X.shape[0], then Network will be trained on the entire data at once
# If batch_size < X.shape[0] (say 2) then Network will be trained first 2 rows, then next 2 rows , then next 2 rows so on.
# if batch_size = 1, then Network will take one row at a time and train
# so batch_size can be anything, if we know before hand then we can set it or set it as None

# timesteps is the length of the sequences, here each row is a sequence for example,
# [[-9.035250067710876, 213], [7.453250169754028, 213], [33.34074878692627, 213],[0, 0]] is s sequence
# Now each sequence has 4 values(here each value is another array),
# so time steps can be 4. Note that it can be variable too. So we can set it or set as None
#***, in keras, number of timesteps should be same for all sequences other wise it will not work.

# features is the number of columns for each data in sequences,
# Here [-9.035250067710876, 213] is data. [7.453250169754028, 213] is data, [33.34074878692627, 213] is data
# Here features is 2
# *** in keras, this should same acorss all the sequences 

In [6]:
X = np.array([
     [[-9.035250067710876, 213], [7.453250169754028, 213], [33.34074878692627, 213],[0, 0]],
     [[-6.63700008392334, 213], [5.132999956607819, 213], [-6.63700008392334, 213],[0, 0]],
     [[-5.1272499561309814, 213], [8.251499891281128, 213], [30.925999641418457, 213],[0, 0]],
     [[-5.1272499561309814, 213], [8.251499891281128, 213], [30.925999641418457, 213],[0, 0]],
     [[-5.1272499561309814, 213], [8.251499891281128, 213], [30.925999641418457, 213],[0, 0]],
     [[-5.1272499561309814, 213], [8.251499891281128, 213], [30.925999641418457, 213],[0, 0]]
 ])

Y = np.array([
     [-9.035250067710876, 213],
     [-6.63700008392334, 213],
     [-5.1272499561309814, 213],
     [-5.1272499561309814, 213],
     [-5.1272499561309814, 213],
    [-5.1272499561309814, 213]
 ])
pprint(X.shape)
pprint(Y.shape)

(6, 4, 2)
(6, 2)


In [24]:
# epoch is for each batch
# We are passing entire X to the model, which will be faster
model = get_net()
model.fit(X, Y, batch_size =X.shape[0], epochs = 5,  verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xb2f983630>

In [28]:
# timesteps - test cases
# Suppose my data look like this
# The second and fourth sequence are of length 3 and rest are of length 4
X = np.array([
     [[-9.035250067710876, 213], [7.453250169754028, 213], [33.34074878692627, 213],[0, 0]],
     [[-6.63700008392334, 213], [5.132999956607819, 213], [-6.63700008392334, 213]],
     [[-5.1272499561309814, 213], [8.251499891281128, 213], [30.925999641418457, 213],[0, 0]],
     [[-5.1272499561309814, 213], [8.251499891281128, 213], [30.925999641418457, 213]],
     [[-5.1272499561309814, 213], [8.251499891281128, 213], [30.925999641418457, 213],[0, 0]],
     [[-5.1272499561309814, 213], [8.251499891281128, 213]]
 ])

Y = np.array([
     [-9.035250067710876, 213],
     [-6.63700008392334, 213],
     [-5.1272499561309814, 213],
     [-5.1272499561309814, 213],
     [-5.1272499561309814, 213],
    [-5.1272499561309814, 213]
 ])

#here shape gives this (6,) which is incomple because we have different sequences length
pprint(X.shape) 
pprint(Y.shape)

(6,)
(6, 2)


In [29]:
# This will give error
model = get_net()
model.fit(X, Y, batch_size =X.shape[0], epochs = 5,  verbose = 1)

ValueError: Error when checking input: expected input_14 to have 3 dimensions, but got array with shape (6, 1)

In [27]:
#the above will give error, to tackle this we can do follwing things
# 1) use one row at a time to train, so instead of passing entire training data/ batch data, feed one by one. 
# in this case sequence length will not matter as we are passing only one sequence, but this will take long time.
# 2) use padding, pad zeros to the smaller length sequences but this might change the data and network will be get affected.
# 3) use bucket, create different batches, each batch will have only the sequences that has same length 
# or such range that we need to pad very small sequences in batch.

In [31]:
# 1) use one row at a time to train, so instead of passing entire training data/ batch data, feed one by one. 
# in this case sequence length will not matter as we are passing only one sequence, but this will take long time.
model = get_net()
for t_x, t_y in zip(X, Y):
    t_x = np.array(t_x)
    t_y = np.array(t_y)
    t_x = t_x.reshape(1, t_x.shape[0], t_x.shape[1])
    t_y = t_y.reshape(1, t_y.shape[0])
    model.fit(t_x, t_y, batch_size =1, epochs = 5,  verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [39]:
# 2) use padding, pad zeros to the smaller length sequences but this might change the data and network will be get affected.
maxlen = 4
X = sequence.pad_sequences(X, maxlen=maxlen) # padded at the begining
display(X)

array([[[ -9, 213],
        [  7, 213],
        [ 33, 213],
        [  0,   0]],

       [[  0,   0],
        [ -6, 213],
        [  5, 213],
        [ -6, 213]],

       [[ -5, 213],
        [  8, 213],
        [ 30, 213],
        [  0,   0]],

       [[  0,   0],
        [ -5, 213],
        [  8, 213],
        [ 30, 213]],

       [[ -5, 213],
        [  8, 213],
        [ 30, 213],
        [  0,   0]],

       [[  0,   0],
        [  0,   0],
        [ -5, 213],
        [  8, 213]]], dtype=int32)

In [40]:
model = get_net()
model.fit(X, Y, batch_size =X.shape[0], epochs = 5,  verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xb3294d1d0>

In [48]:
# 3) use bucket, create different batches, each batch will have only the sequences that has same length 
# or such range that we need to pad very small sequences in batch.
batch1_x = []
batch1_x.append(X[0]) # sequence length - 4
batch1_x.append(X[2]) # sequence length - 4
batch1_x.append(X[4]) # sequence length - 4

batch1_y = []
batch1_y.append(Y[0])
batch1_y.append(Y[2])
batch1_y.append(Y[4])

batch2_x = []
batch2_x.append(X[1]) # sequence length - 3
batch2_x.append(X[3]) # sequence length - 2

maxlen = 3
batch2_x = sequence.pad_sequences(batch2_x, maxlen=maxlen) # padded at the begining

batch2_y = []
batch2_y.append(Y[1])
batch2_y.append(Y[3])

In [49]:
model = get_net()
batch1_x = np.array(batch1_x)
batch1_y = np.array(batch1_y)
model.fit(batch1_x, batch1_y, batch_size =batch1_x.shape[0], epochs = 5,  verbose = 1)

batch2_x = np.array(batch2_x)
batch2_y = np.array(batch2_y)
model.fit(batch2_x, batch2_y, batch_size =batch2_x.shape[0], epochs = 5,  verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xb327444a8>