In [1]:
# LSTM with variable length input sequence to one character output

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras import utils
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
import numpy as np 
# fixing random seed for reproducibility
np.random.seed(7)

# Defining raw dataset
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

# Creating mapping of characters to integers (0 - 25) and the reverse
char_to_int = dict((c , i) for i , c in enumerate(alphabet))
int_to_char = dict((i , c) for i , c in enumerate(alphabet))

# Preparing the dataset

In [6]:
max_len = 5
for i in range(5):
    start = np.random.randint(len(alphabet) - 2)
    end = np.random.randint(start , min(start+ max_len , len(alphabet) - 1))
    print(start ,"->", end)

23 -> 24
16 -> 19
0 -> 2
9 -> 13
14 -> 15


In [7]:
# preparing the dataset of input to output pairs encoded as integers
num_inputs = 1000  # number of samples after combining the dataset
max_len = 5        # Maximum number input sequence length
dataX = []         # For the input sequence
dataY = []         # For the output 

# Generating Input | output dataset
for i in range(num_inputs):
    start = np.random.randint(len(alphabet) - 2)
    end = np.random.randint(start , min(start + max_len , len(alphabet) - 1))

    sequence_in = alphabet[start : end + 1]
    sequence_out = alphabet[end + 1]

    dataX.append([char_to_int[char] for char in sequence_in])
    dataY.append([char_to_int[sequence_out]])

    print(sequence_in ,'->', sequence_out)

XY -> Z
D -> E
T -> U
B -> C
QRSTU -> V
HIJ -> K
JKLM -> N
ABCDE -> F
X -> Y
V -> W
DE -> F
DEFG -> H
BCDE -> F
EFGH -> I
BCDE -> F
FG -> H
RST -> U
TUV -> W
STUV -> W
LMN -> O
P -> Q
MNOP -> Q
JK -> L
MNOP -> Q
OPQRS -> T
UVWXY -> Z
PQRS -> T
D -> E
EFGH -> I
IJK -> L
WX -> Y
STUV -> W
MNOPQ -> R
P -> Q
WXY -> Z
VWX -> Y
V -> W
HI -> J
KLMNO -> P
UV -> W
JKL -> M
ABCDE -> F
WXY -> Z
M -> N
CDEF -> G
KLMNO -> P
RST -> U
RS -> T
W -> X
J -> K
WX -> Y
JKLMN -> O
MN -> O
L -> M
BCDE -> F
TU -> V
MNOPQ -> R
NOPQR -> S
HIJ -> K
JKLM -> N
STUVW -> X
QRST -> U
N -> O
VWXY -> Z
B -> C
UVWX -> Y
OP -> Q
K -> L
C -> D
X -> Y
ST -> U
JKLM -> N
B -> C
QR -> S
RS -> T
VWXY -> Z
S -> T
NOP -> Q
KLMNO -> P
IJ -> K
EF -> G
MNOP -> Q
WXY -> Z
HI -> J
P -> Q
STUVW -> X
Q -> R
MN -> O
O -> P
C -> D
L -> M
JKLM -> N
K -> L
IJKLM -> N
FGHIJ -> K
LM -> N
OPQ -> R
U -> V
HIJKL -> M
PQR -> S
S -> T
OPQR -> S
J -> K
DE -> F
K -> L
BCDE -> F
EFGH -> I
RSTUV -> W
LMNOP -> Q
QR -> S
ABCDE -> F
LM -> N
IJKLM -> N


In [8]:
# Convert list of lists into array and pad sequences if required

x = pad_sequences(dataX , maxlen= max_len , dtype= 'float32')

In [9]:
x

array([[ 0.,  0.,  0., 23., 24.],
       [ 0.,  0.,  0.,  0.,  3.],
       [ 0.,  0.,  0.,  0., 19.],
       ...,
       [ 0.,  3.,  4.,  5.,  6.],
       [ 0., 16., 17., 18., 19.],
       [16., 17., 18., 19., 20.]], dtype=float32)

In [12]:
# Reshape X to be [smaples , time step, features]
x = np.reshape(x , (x.shape[0] , max_len , 1)) 
# Normalize the dataset
x = x/float(len(alphabet))

In [13]:
x

array([[[0.        ],
        [0.        ],
        [0.        ],
        [0.88461536],
        [0.9230769 ]],

       [[0.        ],
        [0.        ],
        [0.        ],
        [0.        ],
        [0.11538462]],

       [[0.        ],
        [0.        ],
        [0.        ],
        [0.        ],
        [0.7307692 ]],

       ...,

       [[0.        ],
        [0.11538462],
        [0.15384616],
        [0.1923077 ],
        [0.23076923]],

       [[0.        ],
        [0.61538464],
        [0.65384614],
        [0.6923077 ],
        [0.7307692 ]],

       [[0.61538464],
        [0.65384614],
        [0.6923077 ],
        [0.7307692 ],
        [0.7692308 ]]], dtype=float32)

In [14]:
# One Hot encoding of the output variable
y = utils.to_categorical(dataY)

In [16]:
y[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 1.], dtype=float32)

In [17]:
x.shape[1]

5

# Creating model and training

In [18]:
# ccreating and fit the model
batch_size = 1

model = Sequential()
model.add(LSTM(32 , input_shape = (x.shape[1] , 1)))
model.add(Dense(y.shape[1] , activation= 'softmax'))

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 32)                4352      
_________________________________________________________________
dense (Dense)                (None, 26)                858       
Total params: 5,210
Trainable params: 5,210
Non-trainable params: 0
_________________________________________________________________


In [20]:
model.compile(
    optimizer= 'adam' , 
    loss= 'categorical_crossentropy',
    metrics= ['accuracy']
)
model.fit(x ,y , epochs= 500 , verbose= 2)

poch 162/500
32/32 - 0s - loss: 0.9146 - accuracy: 0.7910
Epoch 163/500
32/32 - 0s - loss: 0.9137 - accuracy: 0.7850
Epoch 164/500
32/32 - 0s - loss: 0.9170 - accuracy: 0.7430
Epoch 165/500
32/32 - 0s - loss: 0.9093 - accuracy: 0.7930
Epoch 166/500
32/32 - 0s - loss: 0.9030 - accuracy: 0.7980
Epoch 167/500
32/32 - 0s - loss: 0.8976 - accuracy: 0.7660
Epoch 168/500
32/32 - 0s - loss: 0.8967 - accuracy: 0.7840
Epoch 169/500
32/32 - 0s - loss: 0.8901 - accuracy: 0.7900
Epoch 170/500
32/32 - 0s - loss: 0.8882 - accuracy: 0.7960
Epoch 171/500
32/32 - 0s - loss: 0.8835 - accuracy: 0.8170
Epoch 172/500
32/32 - 0s - loss: 0.8778 - accuracy: 0.7910
Epoch 173/500
32/32 - 0s - loss: 0.8727 - accuracy: 0.8140
Epoch 174/500
32/32 - 0s - loss: 0.8740 - accuracy: 0.7800
Epoch 175/500
32/32 - 0s - loss: 0.8655 - accuracy: 0.8160
Epoch 176/500
32/32 - 0s - loss: 0.8631 - accuracy: 0.8010
Epoch 177/500
32/32 - 0s - loss: 0.8598 - accuracy: 0.7970
Epoch 178/500
32/32 - 0s - loss: 0.8551 - accuracy: 0.797

<tensorflow.python.keras.callbacks.History at 0x1ca789ee2b0>

# Summarize performance of the model

In [21]:
scores = model.evaluate(x,y , verbose =0)
print('Model accuracy : {}'.format(scores[1] * 100))

Model accuracy : 92.10000038146973


In [27]:
# demonstration of the model
for i in range(20):
     pattern_index = np.random.randint(len(dataX))
     pattern = dataX[pattern_index]
     # padding the sequence
     x = pad_sequences([pattern] , maxlen=max_len , dtype='float32')
     
     # reshaping the data
     x = np.reshape(x , (1 , max_len , 1))
     
     # normalize the data
     
     x = x/float(len(alphabet))
     prediction = model.predict(x , verbose=0)

     # finding the index of the predicted value
     index = np.argmax(prediction)

     # converting the results into character
     result = int_to_char[index]

     # Converting input sequence into character
     seq_in = [int_to_char[value] for value in pattern]

     print(seq_in, '->', result)


['O', 'P', 'Q'] -> R
['Q', 'R'] -> S
['O', 'P', 'Q', 'R', 'S'] -> T
['B'] -> C
['W'] -> X
['B', 'C', 'D', 'E'] -> F
['G'] -> I
['S', 'T'] -> U
['V'] -> W
['Q', 'R'] -> S
['J', 'K', 'L', 'M', 'N'] -> O
['A', 'B', 'C', 'D'] -> E
['H', 'I', 'J', 'K'] -> L
['S', 'T', 'U', 'V', 'W'] -> X
['T', 'U', 'V', 'W'] -> X
['C', 'D'] -> E
['T', 'U', 'V', 'W', 'X'] -> Y
['H'] -> I
['C'] -> D
['Q'] -> R
