In [8]:
import tensorflow as tf
import numpy as np

In [9]:
data = open('Poems.txt', encoding="utf8").read() 
line_by_line = data.split('\n')
titles = []
texts = []
text = ""
for idx, line in enumerate(line_by_line):
    if not line.startswith('by') and not line.startswith('\"'):
        text += line + "\n"     

# Reading and storing the data as sentences
data = open('Poems.txt', encoding="utf8").read() 

tokenize = tf.keras.preprocessing.text.Tokenizer()
corpus = text.lower().split("\n")

tokenize.fit_on_texts(corpus)
total_words = len(tokenize.word_index) + 1


In [10]:
# Turning the data into a valid input sequence
input_sequences = []
for line in corpus:
    token_list = tokenize.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

print(input_sequences)

[[45, 7], [45, 7, 1724], [45, 7, 1724, 263], [45, 7, 1724, 263, 4], [45, 7, 1724, 263, 4, 3], [45, 7, 1724, 263, 4, 3, 502], [45, 7, 1724, 263, 4, 3, 502, 49], [156, 1080], [156, 1080, 122], [156, 1080, 122, 601], [156, 1080, 122, 601, 2], [156, 1080, 122, 601, 2, 122], [156, 1080, 122, 601, 2, 122, 1725], [602, 430], [602, 430, 68], [602, 430, 68, 789], [602, 430, 68, 789, 1], [602, 430, 68, 789, 1, 603], [602, 430, 68, 789, 1, 603, 1081], [602, 430, 68, 789, 1, 603, 1081, 5], [602, 430, 68, 789, 1, 603, 1081, 5, 60], [2, 502], [2, 502, 1726], [2, 502, 1726, 292], [2, 502, 1726, 292, 22], [2, 502, 1726, 292, 22, 97], [2, 502, 1726, 292, 22, 97, 1082], [2, 502, 1726, 292, 22, 97, 1082, 3], [2, 502, 1726, 292, 22, 97, 1082, 3, 1727], [790, 97], [790, 97, 503], [790, 97, 503, 1], [790, 97, 503, 1, 334], [790, 97, 503, 1, 334, 5], [790, 97, 503, 1, 334, 5, 264], [790, 97, 503, 1, 334, 5, 264, 791], [2, 367], [2, 367, 9], [2, 367, 9, 13], [2, 367, 9, 13, 196], [2, 367, 9, 13, 196, 1728], [

In [11]:
max_sequence_len = max([len(x) for x in input_sequences])
input = tf.keras.utils.pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')
print(input)

[[   0    0    0 ...    0   45    7]
 [   0    0    0 ...   45    7 1724]
 [   0    0    0 ...    7 1724  263]
 ...
 [   0    0    0 ...  286   31 1604]
 [   0    0    0 ...   31 1604  133]
 [   0    0    0 ... 1604  133    7]]


In [12]:
#Getting the features and labels
xs = np.array(input[:,:-1])
labels = np.array(input[:,-1])
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

In [13]:
# Creating the model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(total_words, 240, input_length=max_sequence_len-1))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(150)))
model.add(tf.keras.layers.Dense(total_words, activation='softmax'))
adam = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
history = model.fit(xs, ys, epochs=100, verbose=1, shuffle=False)



Epoch 1/100
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - accuracy: 0.0616 - loss: 7.2161
Epoch 2/100
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 32ms/step - accuracy: 0.0823 - loss: 6.5899
Epoch 3/100
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 30ms/step - accuracy: 0.1014 - loss: 6.6819
Epoch 4/100
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 30ms/step - accuracy: 0.1080 - loss: 6.7188
Epoch 5/100
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 30ms/step - accuracy: 0.1356 - loss: 5.8507
Epoch 6/100
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 30ms/step - accuracy: 0.1577 - loss: 5.4818
Epoch 7/100
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 30ms/step - accuracy: 0.2099 - loss: 4.6366
Epoch 8/100
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 30ms/step - accuracy: 0.2347 - loss: 4.4880
Epoch 9/100
[1m

In [17]:
seed_text = "Once upon"
next_words = 50

for i in range(next_words):
    token_list = tokenize.texts_to_sequences([seed_text])[0]
    token_list = tf.keras.utils.pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    predicted = np.argmax(model.predict(token_list),axis=1)
    output_word= ""
    print(token_list)
    print(predicted)
    for word, index in tokenize.word_index.items():
        if index == predicted:
            output_word = word
            break
    seed_text += " " + output_word

poem = seed_text.split(' ’ ')
for line in poem:
        print(line)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
  244  66]]
[19]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0 244
   66  19]]
[4077]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0  244   66   19 4077]]
[4078]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0  244   66   19 4077 4078]]
[28]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
   244   66   19 4077 4078   28]]
[44]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[[   0    0    0    0    0    0    0    0    0   