In [11]:
import tensorflow as tf

import numpy as np
from pathlib import Path
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import ModelCheckpoint

In [5]:
print(tf.__version__)
print(dir(tf.keras))

In [6]:
text = (open(Path("C:\\Users\\daniu\\my_repos\\jupyter-notebooks\\input-dir\\wonderland.txt")).read())
text = text.lower()

print(text)

In [7]:
characters = sorted(list(set(text)))

n_to_char = {n:char for n, char in enumerate(characters)}
char_to_n = {char:n for n, char in enumerate(characters)}

vocab_size = len(characters)
print("Number of unique characters: {}".format(vocab_size))
print("Characters: {}".format(characters))
print("Number to Character Map: {}".format(n_to_char))
print("Character to Number Map: {}".format(char_to_n))

In [8]:
X = []
Y = []

length = len(text)
seq_length = 10

for i in range(0, length - seq_length, 1):
  sequence = text[i:i + seq_length]
  label = text[i + seq_length]
  X.append([char_to_n[char] for char in sequence])
  Y.append(char_to_n[label])

print("Number of sequences: {}".format(len(X)))
print("First Sequence: {}".format(X[0]))
print("First Label: {}".format(Y[0]))

In [9]:
X_modified = np.reshape(X, (len(X), seq_length, 1))
print(X_modified[0])
X_modified = X_modified / float(len(characters))
print(X_modified[0])
Y_modified = to_categorical(Y)
print(Y_modified[0])

In [10]:
model = Sequential()
model.add(SimpleRNN(400, input_shape=(X_modified.shape[1], X_modified.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(SimpleRNN(400))
model.add(Dropout(0.2))
model.add(Dense(Y_modified.shape[1], activation='softmax'))

In [16]:

# load the network weights saved in the folder model_weights
# filename = "model_weights/gigantic-improvement-20-0.5606.hdf5"
# model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

# define how model checkpoints are saved
filepath = "model_weights/gigantic-improvement-ctd20-{epoch:02d}-{loss:.4f}.keras"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [17]:
model.fit(X_modified, Y_modified, epochs=10, batch_size=128, callbacks = callbacks_list)

In [18]:

start = 10   #random row from the X array
string_mapped = list(X[start])
full_string = [n_to_char[value] for value in string_mapped]

# generating characters
for i in range(400):
    x = np.reshape(string_mapped,(1,len(string_mapped), 1))
    x = x / float(len(characters))

    pred_index = np.argmax(model.predict(x, verbose=0))
    seq = [n_to_char[value] for value in string_mapped]
    full_string.append(n_to_char[pred_index])

    string_mapped.append(pred_index)
    string_mapped = string_mapped[1:len(string_mapped)]

In [20]:
print("".join(full_string))