In [29]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

In [3]:
tokenizer = tf.keras.preprocessing.text.Tokenizer()

In [4]:
data = ""
with open('Michael McCann poem.txt') as f:
    data = f.read()

In [6]:
corpus = data.lower().split("\n")

In [11]:
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1
total_words

269

In [16]:
input_sequences = []
for verse in corpus:
    token_list = tokenizer.texts_to_sequences([verse])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

In [18]:
max_sequence_len = max([len(x) for x in input_sequences])

In [21]:
input_sequences = pad_sequences(input_sequences, maxlen = max_sequence_len, padding='pre')

In [23]:
input_sequences = np.array(input_sequences)

In [24]:
Xs = input_sequences[:,:-1]
labels = input_sequences[:,-1]

In [25]:
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

In [34]:
model = Sequential()
model.add(Embedding(total_words, 240))
model.add(Bidirectional(LSTM(150)))
model.add(Dense(total_words, activation='softmax'))
adam = Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [35]:
model.fit(Xs, ys, epochs=100, verbose=1)

Epoch 1/100


I0000 00:00:1733424868.596462  104778 cuda_dnn.cc:529] Loaded cuDNN version 90600


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.0261 - loss: 5.6117  
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0630 - loss: 5.1638 
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0729 - loss: 4.4782
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.2419 - loss: 3.5150
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4267 - loss: 2.2766
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6627 - loss: 1.3738
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8252 - loss: 0.7238
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8725 - loss: 0.4715
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x72c36d660890>

In [43]:
inverse_word_index = {v: k for k, v in tokenizer.word_index.items()}
seed_text = "I made a poetry machine"
next_words = 20
for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen = max_sequence_len-1, padding='pre')
    predicted = model.predict(token_list, verbose=0)
    print(np.argmax(predicted))
    output_word = inverse_word_index[np.argmax(predicted) + 1]
    seed_text += ' ' + output_word

print(seed_text)

221
223
29
3
3
143
22
231
3
231
231
22
3
143
267
261
127
215
231
231
I made a poetry machine drop much him in in old got saw in saw saw got in old end squeezed potatoes some saw saw


In [36]:
tokenizer.word_index

{'and': 1,
 'the': 2,
 'a': 3,
 'in': 4,
 'all': 5,
 'i': 6,
 'of': 7,
 "lanigan's": 8,
 'ball': 9,
 'for': 10,
 'to': 11,
 'at': 12,
 'were': 13,
 'she': 14,
 'stepped': 15,
 'he': 16,
 'his': 17,
 'girls': 18,
 'as': 19,
 'they': 20,
 "'til": 21,
 'again': 22,
 'got': 23,
 'boys': 24,
 "'round": 25,
 'that': 26,
 'her': 27,
 'there': 28,
 'out': 29,
 'him': 30,
 'six': 31,
 'long': 32,
 'months': 33,
 'spent': 34,
 'learning': 35,
 'was': 36,
 'away': 37,
 'left': 38,
 'friends': 39,
 'relations': 40,
 'when': 41,
 'dublin': 42,
 'doing': 43,
 'myself': 44,
 'nice': 45,
 'just': 46,
 'dancing': 47,
 'merry': 48,
 'tipped': 49,
 'me': 50,
 'soon': 51,
 'time': 52,
 'their': 53,
 'them': 54,
 'danced': 55,
 'new': 56,
 'steps': 57,
 'an': 58,
 'put': 59,
 'leg': 60,
 'miss': 61,
 'fainted': 62,
 'from': 63,
 'up': 64,
 'town': 65,
 'athy': 66,
 'one': 67,
 'jeremy': 68,
 'lanigan': 69,
 'battered': 70,
 "hadn't": 71,
 'pound': 72,
 'father': 73,
 'died': 74,
 'made': 75,
 'man': 76,
 '