In [None]:
import numpy as np
import random

In [2]:
class SequenceEncoder(object):
    """
    Helper class for encoding / decoding number sequences
    """
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, 10))
        for i, c in enumerate(C):
            x[i, c] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return x
    
    def encode_position(self, num_rows, index):
        x = np.zeros([num_rows])
        x[index] = 1
        return x
    
    def decode_position(self,position_sequence):
        return position_sequence.argmax()
    
    def return_subsequence(self, 
                           input_sequence, 
                           start_position_sequence, 
                           end_position_sequence):
        start = self.decode_position(start_position_sequence)
        end = self.decode_position(end_position_sequence)
        decoded_input = self.decode(input_seq)
        return decoded_input[start:end]

In [3]:
sequence_encoder = SequenceEncoder()

In [4]:
sequence = [1, 2, 3]
max_len = 20

In [5]:
input_seq = sequence_encoder.encode([1,2,3], max_len)

In [6]:
start = sequence_encoder.encode_position(max_len, 0)

In [7]:
end = sequence_encoder.encode_position(max_len, 3)

In [8]:
sequence_encoder.return_subsequence(input_seq, start, end)

array([1, 2, 3])

In [9]:

def generate_low_high_low_sequence(length, min_seglen=5, max_seglen=10):
    seq_before = [(random.randint(1,5)) for x in range(random.randint(min_seglen, max_seglen))]
    seq_during = [(random.randint(6,9)) for x in range(random.randint(min_seglen, max_seglen))]
    seq_after = [random.randint(1,5) for x in range(random.randint(min_seglen, max_seglen))]
    seq = seq_before + seq_during + seq_after

    # Pad it up to max len with 0's
    seq = seq + ([0] * (length - len(seq)))
    return [seq, len(seq_before), len(seq_before) + len(seq_during)-1]

In [12]:
x ,start, end = generate_low_high_low_sequence(60)

In [15]:
x[start]

6

In [16]:
x[end]

6

In [17]:
def generate_batch(batch_size, sequence_gen_fun, input_len=60, min_seg=5, max_seg=10):
    sequences = []
    start_indices = []
    end_indices = []
    seq_encoder = SequenceEncoder()
    for i in range(batch_size):
        seq, start, end = sequence_gen_fun(input_len, min_seg, max_seg)
        sequences.append(sequence_encoder.encode(seq, input_len))
        start_indices.append(sequence_encoder.encode_position(input_len, start))
        end_indices.append(sequence_encoder.encode_position(input_len, end))
        
    
    return np.stack(sequences), np.stack([np.stack(start_indices), np.stack(end_indices)], axis=-1)

In [18]:
x_batch, y_batch = generate_batch(45000,generate_low_high_low_sequence)

In [19]:
x_batch.shape

(45000, 60, 10)

In [20]:
y_batch.shape

(45000, 60, 2)

In [55]:
from keras.models import Model, Sequential
from keras.layers import LSTM, TimeDistributed, RepeatVector, Dropout, Dense, Activation, Input, Bidirectional
from keras.optimizers import Adam, SGD


In [59]:
num_indices = 2
input_len = 60
enc_input = Input(shape=(input_len, 10))
enc = LSTM(20, return_sequences=True)(enc_input)
dec = LSTM(20, return_sequences=True, activation='tanh')(enc)
dec = Dense(units=2, activation='softmax')(dec)
model = Model(inputs=[enc_input], outputs=[dec])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 60, 10)            0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 60, 20)            2480      
_________________________________________________________________
lstm_6 (LSTM)                (None, 60, 20)            3280      
_________________________________________________________________
dense_3 (Dense)              (None, 60, 2)             42        
Total params: 5,802.0
Trainable params: 5,802.0
Non-trainable params: 0.0
_________________________________________________________________


In [60]:
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

In [61]:
iterations = 3
for i in range(iterations):
    model.fit(x_batch, y_batch, epochs=1, batch_size=128)
    x_test, y_test = generate_batch(10, generate_low_high_low_sequence, 60, 3, 5)
    y_pred = model.predict(x_test)
    truth = np.argmax(y_test, axis=1)
    pred = np.argmax(y_pred, axis=1)
    for j, val in enumerate(x_test):
        print("sequence: ")
        print(sequence_encoder.decode(val))
        print("actual: ")
        print(truth[j])
        print("prediction:")
        print(pred[j])
        print("-"*60)
        
    print("*" * 60)

Epoch 1/1
sequence: 
[2 2 2 8 6 9 9 1 2 3 2 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
actual: 
[3 6]
prediction:
[3 5]
------------------------------------------------------------
sequence: 
[2 3 2 5 5 9 8 6 8 6 5 3 2 4 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
actual: 
[5 9]
prediction:
[5 9]
------------------------------------------------------------
sequence: 
[2 1 2 1 2 9 8 8 4 4 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
actual: 
[5 7]
prediction:
[5 8]
------------------------------------------------------------
sequence: 
[2 3 2 2 7 8 6 6 9 3 4 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
actual: 
[4 8]
prediction:
[4 8]
------------------------------------------------------------
sequence: 
[5 2 1 6 7 7 6 5 3 2 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

In [33]:
sequence_encoder.decode(x_batch[0])

array([4, 2, 1, 5, 2, 4, 5, 4, 1, 9, 7, 6, 8, 6, 6, 9, 1, 2, 2, 1, 1, 5, 3,
       2, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [34]:
u = model.predict(x_batch[:1]).transpose()

In [35]:
np.argmax(u[1])

15

In [48]:
i, j = sequence_encoder.encode_position(60, start), sequence_encoder.encode_position(60, end)

In [49]:
seq, start, end = generate_low_high_low_sequence(60)

In [50]:
test = np.stack([sequence_encoder.encode(seq, 60)])

In [51]:
np.argmax(model.predict(test), axis=1)

array([[ 5, 10]])

In [53]:
pred = model.predict(test)

In [54]:
pred

array([[[ 0.50130099,  0.49869898],
        [ 0.4990516 ,  0.50094837],
        [ 0.49947953,  0.50052053],
        [ 0.49802601,  0.50197399],
        [ 0.49760228,  0.50239772],
        [ 0.98790377,  0.0120962 ],
        [ 0.50738168,  0.49261838],
        [ 0.50020063,  0.49979934],
        [ 0.49808249,  0.50191748],
        [ 0.41034853,  0.58965147],
        [ 0.39232054,  0.60767949],
        [ 0.50918025,  0.49081972],
        [ 0.50630337,  0.49369669],
        [ 0.49607721,  0.50392282],
        [ 0.49612963,  0.50387037],
        [ 0.495197  ,  0.504803  ],
        [ 0.49697086,  0.50302911],
        [ 0.49816772,  0.50183225],
        [ 0.49945989,  0.50054008],
        [ 0.4995819 ,  0.50041813],
        [ 0.5000174 ,  0.4999826 ],
        [ 0.49971125,  0.50028872],
        [ 0.4991478 ,  0.50085223],
        [ 0.49854055,  0.50145948],
        [ 0.49809724,  0.50190282],
        [ 0.49788755,  0.50211245],
        [ 0.49788094,  0.50211906],
        [ 0.49800608,  0.501