In [3]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.models import load_model
from keras.utils import np_utils


In [4]:
# Read file - wonderland.txt
filename = 'wonderland.txt'
data = open(filename, 'r')
data = data.read().lower()
print("Length of the data: " + str(len(data)))

Length of the data: 12142


In [5]:
# Find unique characters in the data
chars = sorted(list(set(data)))
print("Number of unique charaters in the data: " + str(len(chars)))

Number of unique charaters in the data: 51


In [6]:
# Map characters to integers
char_to_integer = []
for integer, char in enumerate(chars):
    char_to_integer.append((char, integer))
char_to_integer = dict(char_to_integer)
print(char_to_integer)

{'\n': 0, ' ': 1, '!': 2, '"': 3, '#': 4, "'": 5, '(': 6, ')': 7, '*': 8, ',': 9, '-': 10, '.': 11, '0': 12, '1': 13, '2': 14, '3': 15, '4': 16, '5': 17, '8': 18, '9': 19, ':': 20, ';': 21, '?': 22, '[': 23, ']': 24, 'a': 25, 'b': 26, 'c': 27, 'd': 28, 'e': 29, 'f': 30, 'g': 31, 'h': 32, 'i': 33, 'j': 34, 'k': 35, 'l': 36, 'm': 37, 'n': 38, 'o': 39, 'p': 40, 'q': 41, 'r': 42, 's': 43, 't': 44, 'u': 45, 'v': 46, 'w': 47, 'x': 48, 'y': 49, 'z': 50}


In [7]:
# map integers to characters
integer_to_char = []
for integer, char in enumerate(chars):
    integer_to_char.append((integer, char))
integer_to_char = dict(integer_to_char)
print(integer_to_char)

{0: '\n', 1: ' ', 2: '!', 3: '"', 4: '#', 5: "'", 6: '(', 7: ')', 8: '*', 9: ',', 10: '-', 11: '.', 12: '0', 13: '1', 14: '2', 15: '3', 16: '4', 17: '5', 18: '8', 19: '9', 20: ':', 21: ';', 22: '?', 23: '[', 24: ']', 25: 'a', 26: 'b', 27: 'c', 28: 'd', 29: 'e', 30: 'f', 31: 'g', 32: 'h', 33: 'i', 34: 'j', 35: 'k', 36: 'l', 37: 'm', 38: 'n', 39: 'o', 40: 'p', 41: 'q', 42: 'r', 43: 's', 44: 't', 45: 'u', 46: 'v', 47: 'w', 48: 'x', 49: 'y', 50: 'z'}


In [8]:
# Convert X(sequence) and y(one character) into integers
X_train = []
y_train = []
length_of_sequence = 100
for i in range(0, (len(data) - length_of_sequence)):
    sequence = data[i:i+length_of_sequence]
    sequence_int = []
    for char in sequence:
        sequence_int.append(char_to_integer[char])
    X_train.append(sequence_int)    
    label = data[i+length_of_sequence]
    y_train.append(char_to_integer[label])

In [9]:
# Reshape X_train and normalize
samples = len(X_train)
X = np.reshape(X_train, (samples, length_of_sequence, 1)) 
X = X/float(len(chars))
print(X.shape)

(12042, 100, 1)


In [10]:
# y_train: Integers to one hot vectors
y_train = np_utils.to_categorical(y_train)
print(y_train.shape)

(12042, 51)


In [11]:
# Model
model = Sequential()
model.add(LSTM(256, input_shape = (X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y_train.shape[1], activation = 'softmax'))
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 256)               264192    
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 51)                13107     
Total params: 277,299
Trainable params: 277,299
Non-trainable params: 0
_________________________________________________________________
None


In [1]:
# Load model weights
filename = 'model_weights_LSTM.h5'
model = load_model(filename)
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

random_number = np.random.randint(0, len(X)-1)
input_sequence = X_train[random_number]
print(input_sequence)

In [None]:
# Prints the input_sequence in string format
input_sequence_char = []
for x in input_sequence:
    input_sequence_char.append(integer_to_char[x])
print(''.join(input_sequence_char))
print(len(input_sequence))

output = []
output += input_sequence
for i in range(1000):
    x = np.reshape(input_sequence, (1, len(input_sequence), 1))
    x = x/float(len(chars))
    y_pred = model.predict(x, verbose=0)
    # print(y_pred)
    # print(y_pred.shape)
    index = np.argmax(y_pred)
    output.append(index)
    y_char = integer_to_char[index]
    # print(index, y_char)
    # print(input_sequence)
    input_sequence.append(index)
    input_sequence = input_sequence[1:]
    # print(input_sequence)
output = ''.join([integer_to_char[integer] for integer in output])
print(output)