# 27 Understanding Stateful LSTM Recurrent Neural Networks

## 27.1 Problem Description: Learn the Alphabet

In [1]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils

# fix random seed for reproducibility
numpy.random.seed(7)

Using TensorFlow backend.


In [2]:
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

In [4]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 1
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
  seq_in = alphabet[i:i + seq_length]
  seq_out = alphabet[i + seq_length]
  dataX.append([char_to_int[char] for char in seq_in])
  dataY.append(char_to_int[seq_out])
  print(seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


In [5]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))
# normalize
X = X / float(len(alphabet))
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [6]:
y

array([[ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0

## 27.2 LSTM for Learning One-Char to One-Char Map- ping

In [7]:
# create and fit the model
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, nb_epoch=500, batch_size=1, verbose=2)



Epoch 1/500
1s - loss: 3.2661 - acc: 0.0000e+00
Epoch 2/500
0s - loss: 3.2582 - acc: 0.0000e+00
Epoch 3/500
0s - loss: 3.2551 - acc: 0.0400
Epoch 4/500
0s - loss: 3.2524 - acc: 0.0400
Epoch 5/500
0s - loss: 3.2495 - acc: 0.0400
Epoch 6/500
0s - loss: 3.2471 - acc: 0.0400
Epoch 7/500
0s - loss: 3.2440 - acc: 0.0400
Epoch 8/500
0s - loss: 3.2412 - acc: 0.0400
Epoch 9/500
0s - loss: 3.2378 - acc: 0.0400
Epoch 10/500
0s - loss: 3.2348 - acc: 0.0400
Epoch 11/500
0s - loss: 3.2313 - acc: 0.0400
Epoch 12/500
0s - loss: 3.2277 - acc: 0.0400
Epoch 13/500
0s - loss: 3.2237 - acc: 0.0400
Epoch 14/500
0s - loss: 3.2203 - acc: 0.0400
Epoch 15/500
0s - loss: 3.2160 - acc: 0.0400
Epoch 16/500
0s - loss: 3.2116 - acc: 0.0400
Epoch 17/500
0s - loss: 3.2066 - acc: 0.0400
Epoch 18/500
0s - loss: 3.2016 - acc: 0.0400
Epoch 19/500
0s - loss: 3.1969 - acc: 0.0400
Epoch 20/500
0s - loss: 3.1911 - acc: 0.0400
Epoch 21/500
0s - loss: 3.1854 - acc: 0.0400
Epoch 22/500
0s - loss: 3.1789 - acc: 0.0400
Epoch 23/50

<keras.callbacks.History at 0x117d4ff98>

In [8]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

Model Accuracy: 88.00%


In [9]:
# demonstrate some model predictions
for pattern in dataX:
  x = numpy.reshape(pattern, (1, len(pattern), 1))
  x = x / float(len(alphabet))
  prediction = model.predict(x, verbose=0)
  index = numpy.argmax(prediction)
  result = int_to_char[index]
  seq_in = [int_to_char[value] for value in pattern]
  print(seq_in, "->", result)

['A'] -> B
['B'] -> C
['C'] -> D
['D'] -> E
['E'] -> F
['F'] -> G
['G'] -> H
['H'] -> I
['I'] -> J
['J'] -> K
['K'] -> L
['L'] -> M
['M'] -> N
['N'] -> O
['O'] -> P
['P'] -> Q
['Q'] -> R
['R'] -> S
['S'] -> T
['T'] -> U
['U'] -> V
['V'] -> Y
['W'] -> Y
['X'] -> Z
['Y'] -> Z


## 27.3 LSTM for a Feature Window to One-Char Mapping

In [13]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 3                                           # <---

dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
  seq_in = alphabet[i:i + seq_length]
  seq_out = alphabet[i + seq_length]
  dataX.append([char_to_int[char] for char in seq_in])
  dataY.append(char_to_int[seq_out])
  print(seq_in, "->" , seq_out)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (len(dataX), 1, seq_length))     # <---
# normalize
X = X / float(len(alphabet))
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
# create and fit the model
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, nb_epoch=500, batch_size=1, verbose=2)
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))
# demonstrate some model predictions
for pattern in dataX:
  x = numpy.reshape(pattern, (1, 1, len(pattern)))        # <---
  x = x / float(len(alphabet))
  prediction = model.predict(x, verbose=0)
  index = numpy.argmax(prediction)
  result = int_to_char[index]
  seq_in = [int_to_char[value] for value in pattern]
  print(seq_in, "->", result)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z




Epoch 1/500
1s - loss: 3.2688 - acc: 0.0000e+00
Epoch 2/500
0s - loss: 3.2553 - acc: 0.0435
Epoch 3/500
0s - loss: 3.2484 - acc: 0.0000e+00
Epoch 4/500
0s - loss: 3.2418 - acc: 0.0000e+00
Epoch 5/500
0s - loss: 3.2342 - acc: 0.0435
Epoch 6/500
0s - loss: 3.2275 - acc: 0.0435
Epoch 7/500
0s - loss: 3.2203 - acc: 0.0000e+00
Epoch 8/500
0s - loss: 3.2122 - acc: 0.0435
Epoch 9/500
0s - loss: 3.2042 - acc: 0.0000e+00
Epoch 10/500
0s - loss: 3.1956 - acc: 0.0435
Epoch 11/500
0s - loss: 3.1872 - acc: 0.0435
Epoch 12/500
0s - loss: 3.1775 - acc: 0.0435
Epoch 13/500
0s - loss: 3.1687 - acc: 0.0435
Epoch 14/500
0s - loss: 3.1590 - acc: 0.0000e+00
Epoch 15/500
0s - loss: 3.1480 - acc: 0.0435
Epoch 16/500
0s - loss: 3.1389 - acc: 0.0435
Epoch 17/500
0s - loss: 3.1288 - acc: 0.0435
Epoch 18/500
0s - loss: 3.1186 - acc: 0.0435
Epoch 19/500
0s - loss: 3.1103 - acc: 0.0435
Epoch 20/500
0s - loss: 3.1001 - acc: 0.0435
Epoch 21/500
0s - loss: 3.0919 - acc: 0.0435
Epoch 22/500
0s - loss: 3.0826 - acc: 0.