<a href="https://colab.research.google.com/github/aju22/FrostLSTM-Remembering-Robert-Frost/blob/main/Poetry_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [74]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Enable GPU

In [75]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


# Parameters

In [134]:
MAX_VOCAB_SIZE = 3000
MAX_SEQUENCE_LENGTH = 100
EMBEDDING_DIM = 50
VALIDATION_SPLIT = 0.2
BATCH_SIZE = 128
EPOCHS = 2000
LATENT_DIM = 50

In [None]:
data_dir = "drive/MyDrive/Poetry_Generation/Robert Frost.txt"       
word2vec_dir = "drive/MyDrive/Poetry_Generation/glove.6B.50d.txt"
model_save_dir = "/content/drive/MyDrive/Poetry_Generation/MODEL_CKPT.h5"

#Loading Data

In [135]:
input_texts = []
target_texts = []

for line in open(data_dir):
  line = line.rstrip()
  if not line:
    continue

  input_line = '<sos> ' + line
  target_line = line + ' <eos>'


  input_texts.append(input_line)      # <sos>  The    apple   fell  far   from  the    tree
  target_texts.append(target_line)    #  The   apple  fell    far   from  the   tree   <eos>

In [136]:
all_lines = input_texts + target_texts

# Data Preprocessing

In [137]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [138]:
tokenizer = Tokenizer(num_words = MAX_VOCAB_SIZE, filters='''!"#$%&()*'+,-./:;=?@[\\]^_`{|}~\t''')
tokenizer.fit_on_texts(all_lines)
input_sequences = tokenizer.texts_to_sequences(input_texts)
target_sequences = tokenizer.texts_to_sequences(target_texts)

In [139]:
word2idx = tokenizer.word_index
print(f"Unique Tokens : {len(word2idx)}")

Unique Tokens : 2129


In [140]:
idx2word = {v:k for k, v in word2idx.items()}

In [141]:
max_sequence_length_of_data = max([len(s) for s in input_sequences])
max_seq_length = min(max_sequence_length_of_data, MAX_SEQUENCE_LENGTH)
print(f"Max Length of Sequence : {max_seq_length}")

Max Length of Sequence : 15


In [142]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [143]:
input_sequences = pad_sequences(input_sequences, maxlen = max_seq_length, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen = max_seq_length, padding='post')

# Pretrained Word2Vec

In [144]:
word2vec = {}

with open(word2vec_dir) as f:
  for line in f:
    values = line.split()
    word = values[0]
    vec = np.array(values[1:], dtype='float32')
    word2vec[word] = vec

In [145]:
num_words = min(MAX_VOCAB_SIZE, len(word2idx) + 1)
embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))

In [146]:
for word, i in word2idx.items():
  if i < MAX_VOCAB_SIZE:
    embedding_vector = word2vec.get(word)
    if embedding_vector is not None:
      embedding_matrix[i] = embedding_vector

# One-Hot Encoding Targets

In [147]:
one_hot_targets = np.zeros((len(input_sequences), max_seq_length, num_words))

In [148]:
for i, target_sequences in enumerate(target_sequences):
  for t, word in enumerate(target_sequences):
    if word > 0:
      one_hot_targets[i, t, word] = 1

# Building the Model

In [149]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, Input, LSTM 

In [150]:
embedding_layer = Embedding(
    num_words,
    EMBEDDING_DIM,
    weights = [embedding_matrix]
)

In [151]:
input_layer = Input(shape = (max_seq_length, ))
initial_h = Input(shape = (LATENT_DIM, ))
initial_c = Input(shape = (LATENT_DIM, ))

x = embedding_layer(input_layer)

lstm = LSTM(LATENT_DIM, return_sequences=True, return_state=True)
x, _, _ = lstm(x, initial_state = [initial_h, initial_c])

dense = Dense(num_words, activation='softmax')
output = dense(x)

In [152]:
with tf.device(device_name):
  model = Model([input_layer, initial_h, initial_c], output)

  model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = ['accuracy']
  )

In [153]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(model_save_dir, save_best_only=True)
]

In [None]:
init = np.zeros((len(input_sequences), LATENT_DIM))

with tf.device(device_name):
  model.fit([input_sequences, init, init],
          one_hot_targets,
          batch_size = BATCH_SIZE,
          epochs = EPOCHS,
          validation_split = VALIDATION_SPLIT,
          callbacks = callbacks)

# Loading the Model

In [33]:
model = tf.keras.models.load_model(
    "/content/drive/MyDrive/Poetry_Generation/MODEL_CKPT.h5", custom_objects=None, compile=True, options=None
)

In [155]:
model.layers

[<keras.engine.input_layer.InputLayer at 0x7f3cbe5f5f90>,
 <keras.layers.embeddings.Embedding at 0x7f3cc24477d0>,
 <keras.engine.input_layer.InputLayer at 0x7f3cbe5f55d0>,
 <keras.engine.input_layer.InputLayer at 0x7f3cbe61d790>,
 <keras.layers.recurrent_v2.LSTM at 0x7f3cc2496950>,
 <keras.layers.core.dense.Dense at 0x7f3cbc1c8350>]

In [156]:
lstm = model.layers[4]
dense = model.layers[5]

# Sampling Model

In [157]:
input2 = Input(shape = (1, ))
x = embedding_layer(input2)
x, h, c = lstm(x, initial_state = [initial_h, initial_c])
output2 = dense(x)

In [158]:
sampling_model = Model([input2, initial_h, initial_c], [output2, h, c])

In [159]:
def sample_line():

  np_input = np.array([[word2idx['<sos>']]])
  h = np.zeros((1, LATENT_DIM))
  c = np.zeros((1, LATENT_DIM))

  eos = word2idx['<eos>']

  output_sentence = []

  for _ in range(max_seq_length):
    o, h, c = sampling_model.predict([np_input, h ,c])

    probs = o[0,0]
    if np.argmax(probs) == 0:
      print("Null-Warning")

    probs[0] = 0
    probs /= probs.sum()

    idx = np.random.choice(len(probs), p = probs)
    if idx == eos:
      break

    output_sentence.append(idx2word.get(idx, f"<WARNING at {idx}>"))

    np_input[0,0] = idx

  return ' '.join(output_sentence)   

In [186]:
POEM_LENGTH = 10

for _ in range(POEM_LENGTH):
  print(sample_line())

you let make a present but drowned in important
except always john he chanced
wishing in virgin feet
behind the door of day in the kitchen field fate
but they left a horny handed kindness
the way a man of the window toward chases lowes from picking however
the breath of air was such her nights out of believe
i listened till me you sleep
and stamped and were mounted for us believe everybody m granny speaking
one bearing it your going to bear safely
