In [1]:
# LSTM for one-char to one-char prediction

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras import utils


In [2]:
import numpy as np 
np.random.seed(7)   # fix random seed for reproducibility

# define raw dataset
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

# creating mapping of characters to integers (0 - 25) and reverse
char_to_int = dict((c , i) for i , c in enumerate(alphabet))
int_to_char = dict((i , c) for i , c in enumerate(alphabet))

In [3]:
char_to_int

{'A': 0,
 'B': 1,
 'C': 2,
 'D': 3,
 'E': 4,
 'F': 5,
 'G': 6,
 'H': 7,
 'I': 8,
 'J': 9,
 'K': 10,
 'L': 11,
 'M': 12,
 'N': 13,
 'O': 14,
 'P': 15,
 'Q': 16,
 'R': 17,
 'S': 18,
 'T': 19,
 'U': 20,
 'V': 21,
 'W': 22,
 'X': 23,
 'Y': 24,
 'Z': 25}

In [4]:
len(alphabet)

26

# Data prepartion
preparing input and output data form raw dataset the data set must be sequential so that it can predict the value just after the input char

In [5]:
input_seq = 1 # This will decide after how many charecter the model will predict the next charcter
dataX = []  # For input sequence
dataY = []  # For output sequence

for i in range(len(alphabet)):
    seq_in = alphabet[i]
    if i == 25:
        seq_out = alphabet[0]
    else:
        seq_out = alphabet[i + input_seq]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append([char_to_int[char] for char in seq_out])
    print( seq_in, '->', seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z
Z -> A


In [6]:
dataX

[[0],
 [1],
 [2],
 [3],
 [4],
 [5],
 [6],
 [7],
 [8],
 [9],
 [10],
 [11],
 [12],
 [13],
 [14],
 [15],
 [16],
 [17],
 [18],
 [19],
 [20],
 [21],
 [22],
 [23],
 [24],
 [25]]

In [7]:
dataY

[[1],
 [2],
 [3],
 [4],
 [5],
 [6],
 [7],
 [8],
 [9],
 [10],
 [11],
 [12],
 [13],
 [14],
 [15],
 [16],
 [17],
 [18],
 [19],
 [20],
 [21],
 [22],
 [23],
 [24],
 [25],
 [0]]

In [8]:
# Now we will Reshape the the input dataset and normalize it
# the the shape of the dataset after reshape will be [samples , input_sequence , features]
x = np.reshape(dataX , (len(dataX) , input_seq , 1))
# normalizing the dataset
x = x/float(len(alphabet))

# The output dataset will be "One_Hot_Encoded"
y = utils.to_categorical(dataY)

In [9]:
x.shape

(26, 1, 1)

In [10]:
x

array([[[0.        ]],

       [[0.03846154]],

       [[0.07692308]],

       [[0.11538462]],

       [[0.15384615]],

       [[0.19230769]],

       [[0.23076923]],

       [[0.26923077]],

       [[0.30769231]],

       [[0.34615385]],

       [[0.38461538]],

       [[0.42307692]],

       [[0.46153846]],

       [[0.5       ]],

       [[0.53846154]],

       [[0.57692308]],

       [[0.61538462]],

       [[0.65384615]],

       [[0.69230769]],

       [[0.73076923]],

       [[0.76923077]],

       [[0.80769231]],

       [[0.84615385]],

       [[0.88461538]],

       [[0.92307692]],

       [[0.96153846]]])

In [11]:
print(y.shape)
y

(26, 26)


array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 

# Creating model Using LSTM

In [12]:
# creating model

model = Sequential()
model.add(LSTM(32 , input_shape=(x.shape[1], x.shape[2])))
# the input_shape is [number of sequence , number of features] 
model.add(Dense(y.shape[1], activation = 'softmax'))

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 32)                4352      
_________________________________________________________________
dense (Dense)                (None, 26)                858       
Total params: 5,210
Trainable params: 5,210
Non-trainable params: 0
_________________________________________________________________


In [18]:
# compiling the model and training on the input and output data
model.compile(
    optimizer= 'adam' , 
    loss= 'categorical_crossentropy',
    metrics= ['accuracy']
)

# training the model on the input and output dataset
model.fit(x ,y , batch_size= 1 , epochs= 500 , verbose= 2)

poch 162/500
26/26 - 0s - loss: 1.6295 - accuracy: 0.7308
Epoch 163/500
26/26 - 0s - loss: 1.6305 - accuracy: 0.8077
Epoch 164/500
26/26 - 0s - loss: 1.6295 - accuracy: 0.7692
Epoch 165/500
26/26 - 0s - loss: 1.6257 - accuracy: 0.8077
Epoch 166/500
26/26 - 0s - loss: 1.6302 - accuracy: 0.7692
Epoch 167/500
26/26 - 0s - loss: 1.6269 - accuracy: 0.7692
Epoch 168/500
26/26 - 0s - loss: 1.6254 - accuracy: 0.8462
Epoch 169/500
26/26 - 0s - loss: 1.6218 - accuracy: 0.7692
Epoch 170/500
26/26 - 0s - loss: 1.6202 - accuracy: 0.8462
Epoch 171/500
26/26 - 0s - loss: 1.6232 - accuracy: 0.8077
Epoch 172/500
26/26 - 0s - loss: 1.6258 - accuracy: 0.8077
Epoch 173/500
26/26 - 0s - loss: 1.6237 - accuracy: 0.8077
Epoch 174/500
26/26 - 0s - loss: 1.6230 - accuracy: 0.8077
Epoch 175/500
26/26 - 0s - loss: 1.6232 - accuracy: 0.8077
Epoch 176/500
26/26 - 0s - loss: 1.6232 - accuracy: 0.7692
Epoch 177/500
26/26 - 0s - loss: 1.6217 - accuracy: 0.7692
Epoch 178/500
26/26 - 0s - loss: 1.6193 - accuracy: 0.846

<tensorflow.python.keras.callbacks.History at 0x19d9c33b0d0>

# Summerize performance of the model

In [19]:
scores = model.evaluate(x ,y, verbose = 1)
print("Model accuracy : % .2f%%" %(scores[1] * 100))

Model accuracy :  92.31%


In [16]:
# Checking for one sample

test = dataX[4]
# reshaping the test data
test = np.reshape(test , (1 ,len(test) , 1))
# normalizing the dataset
test = test/len(alphabet)
pred = model.predict(test , verbose = 0)
pred

array([[7.32131153e-13, 4.48931642e-02, 8.02344531e-02, 1.19360223e-01,
        1.71645775e-01, 1.87072158e-01, 1.64251834e-01, 1.15697496e-01,
        6.39868453e-02, 3.20442207e-02, 1.28158294e-02, 5.02179144e-03,
        2.03072745e-03, 6.34440745e-04, 2.16499888e-04, 6.99673838e-05,
        1.72331511e-05, 5.80470532e-06, 1.29440707e-06, 3.09722594e-07,
        6.75047858e-08, 1.35409826e-08, 2.79239498e-09, 4.68679984e-10,
        7.23500218e-11, 6.44398077e-12]], dtype=float32)

In [17]:
ind = np.argmax(pred)
print("predicted value index : {}".format(ind))
res = int_to_char[ind]
print('The charactor after {0} is {1}'.format(int_to_char[4] , res))

predicted value index : 5
The charactor after E is F


In [20]:
dataX[0:2]

[[0], [1]]

In [22]:
for pattern in dataX:

    x = np.reshape(pattern , (1 , len(pattern) , 1))
    x = x/float(len(alphabet))
    pred = model.predict(x)
    index = np.argmax(pred)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, '->', result)

['A'] -> B
['B'] -> B
['C'] -> D
['D'] -> E
['E'] -> F
['F'] -> G
['G'] -> H
['H'] -> I
['I'] -> J
['J'] -> K
['K'] -> L
['L'] -> M
['M'] -> N
['N'] -> O
['O'] -> P
['P'] -> Q
['Q'] -> R
['R'] -> S
['S'] -> T
['T'] -> U
['U'] -> V
['V'] -> W
['W'] -> X
['X'] -> Y
['Y'] -> A
['Z'] -> A
