In [1]:
import tensorflow as tf
import numpy as np

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!mkdir ~/.kaggle
!cp /content/drive/MyDrive/.credentials/kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download harshalgadhe/poem-generation
!unzip -d data/ poem-generation.zip 

Downloading poem-generation.zip to /content
  0% 0.00/39.9k [00:00<?, ?B/s]
100% 39.9k/39.9k [00:00<00:00, 39.6MB/s]
Archive:  poem-generation.zip
  inflating: data/poem.txt           


In [4]:
with open("/content/data/poem.txt", "r") as f:
    lines = f.read().split("\n")

In [5]:
tokenizer = Tokenizer(oov_token = "<<OOV>>")
tokenizer.fit_on_texts(lines)

total_words = len(tokenizer.word_index)

In [6]:
input_sequences = []
for line in lines:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        input_sequences.append(token_list[:i+1])

padded_input_sequences = np.array(pad_sequences(input_sequences))
max_len = padded_input_sequences.shape[1]

In [7]:
X_train, y_train = padded_input_sequences[:, :-1], padded_input_sequences[:, -1]
y_train = tf.keras.utils.to_categorical(y_train)

In [8]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(total_words, 128, input_length = max_len - 1),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512)),
    tf.keras.layers.Dense(total_words / 2, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(total_words + 1, activation='softmax')
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 15, 128)           487424    
                                                                 
 bidirectional (Bidirectiona  (None, 1024)             2625536   
 l)                                                              
                                                                 
 dense (Dense)               (None, 1904)              1951600   
                                                                 
 dropout (Dropout)           (None, 1904)              0         
                                                                 
 dense_1 (Dense)             (None, 3809)              7256145   
                                                                 
Total params: 12,320,705
Trainable params: 12,320,705
Non-trainable params: 0
____________________________________________

In [9]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4),
    loss="categorical_crossentropy",
    metrics = ['accuracy']
)

class MyCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy') > 0.975):
            self.model.stop_training = True

callbacks = [MyCallback()]

In [10]:
model.fit(X_train, y_train, epochs=50, verbose=1, callbacks = callbacks)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f6fb027d810>