# Project: Poetry generation

In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# import string
import requests
import pandas as pd
import numpy as np


import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, Conv1D, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
try:
    response = requests.get('https://raw.githubusercontent.com/laxmimerit/poetry-data/master/adele.txt')

    print('Status: ', response.status_code)
    print('[INFO] Data have been downloaded successfully')
except Exception as e: 
    print('[ERROR] Data were not downloaded')
    print(e)

# In case the data cannot be downloaded, manually load file 'adele.txt' contained in directory Data

Status:  200
[INFO] Data have been downloaded successfully


In [3]:
# Split text to lines
data = response.text.splitlines()

## Build LSTM Model

In [4]:
# Tokenization
token = Tokenizer()
token.fit_on_texts(data)

# Encoded text
encoded_text = token.texts_to_sequences(data)

# Calculate vocabulary size
vocab_size = len(token.word_counts) + 1
print('[INFO] Vocabulary size: ', vocab_size)


[INFO] Vocabulary size:  1396


##  Prepare Training Data

In [5]:
# Prepare training data
datalist = []
for d in encoded_text:
  if len(d)>1:
    for i in range(2, len(d)):
      datalist.append(d[:i])
      
      
# Padding
max_length = 20
sequences = pad_sequences(datalist, maxlen=max_length, padding='pre')


# Create inputs/outputs
X = sequences[:, :-1]
y = sequences[:, -1]

y = to_categorical(y, num_classes=vocab_size)


seq_length = X.shape[1]
print('[INFO] Sequence length: ', seq_length)

[INFO] Sequence length:  19


## LSTM Model Training

In [6]:
# model = Sequential()
# model.add(Embedding(vocab_size, 50, input_length=seq_length))
# model.add(LSTM(100, return_sequences=True))
# model.add(LSTM(100))
# model.add(Dense(100, activation='relu'))
# model.add(Dense(vocab_size, activation='softmax'))

model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=seq_length))
model.add(Conv1D(64, activation='relu', kernel_size=4, strides=2, padding="same"))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))


model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.summary()

In [7]:
score = model.fit(X, y,
                  batch_size=32, 
                  verbose=True, 
                  epochs=100)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

## Poetry Generation

In [8]:
def generate_poetry(seed_text = '', n_lines = 5, poetry_length = 10):

  for i in range(n_lines):
    text = []
    for _ in range(poetry_length):
      encoded = token.texts_to_sequences([seed_text])
      encoded = pad_sequences(encoded, maxlen=seq_length, padding='pre')

      y_pred = np.argmax(model.predict(encoded), axis=-1)

      predicted_word = ""
      for word, index in token.word_index.items():
        if index == y_pred:
          predicted_word = word
          break

      seed_text = seed_text + ' ' + predicted_word
      text.append(predicted_word)

    seed_text = text[-1]
    text = ' '.join(text)
    print(text)


    
seed_text = 'i love you'
generate_poetry(seed_text, 5)

when the rain is blowing in your right things i
know it ain't easy giving up your mind some i'll
be better to you let me stay here for just
look and you will know i re love again i
know it ain't easy giving up your mind some i'll
