# ANN Final Project: Poem Generation

Emma Sheridan and Jessica Petersen

In [32]:
import numpy as np
import pandas as pd 
import random
import sys
import io
from bs4 import BeautifulSoup
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import RMSprop
import requests as rq

Using TensorFlow backend.


In [4]:
df = pd.read_csv('poems.csv') # load dataset
df.head() # getting top 5 head

Unnamed: 0,author,content,poem name,age,type
0,WILLIAM SHAKESPEARE,Let the bird of loudest lay\r\nOn the sole Ara...,The Phoenix and the Turtle,Renaissance,Mythology & Folklore
1,DUCHESS OF NEWCASTLE MARGARET CAVENDISH,"Sir Charles into my chamber coming in,\r\nWhen...",An Epilogue to the Above,Renaissance,Mythology & Folklore
2,THOMAS BASTARD,"Our vice runs beyond all that old men saw,\r\n...","Book 7, Epigram 42",Renaissance,Mythology & Folklore
3,EDMUND SPENSER,"Lo I the man, whose Muse whilome did maske,\r\...","from The Faerie Queene: Book I, Canto I",Renaissance,Mythology & Folklore
4,RICHARD BARNFIELD,"Long have I longd to see my love againe,\r\nSt...",Sonnet 16,Renaissance,Mythology & Folklore


In [5]:
df.groupby('type').count()

Unnamed: 0_level_0,author,content,poem name,age
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Love,326,326,326,326
Mythology & Folklore,59,59,58,59
Nature,188,188,187,188


In [144]:
# get the love, nature, and mythology poems 
poems = df['content']
lovePoems1 = poems.loc[df['type'] == 'Love']
mythPoems = poems.loc[df['type'] == 'Mythology & Folklore']
naturePoems = poems.loc[df['type'] == 'Nature']

In [145]:
lovePoems1.head()
lovePoems1 = lovePoems1.to_string()

In [146]:
# loading additional love poems from poetryfoundation.com dataset
df2 = pd.read_csv('PoetryFoundationData.csv') # load dataset
df2 = df2.dropna()
df2.head() # getting top 5 head

Unnamed: 0.1,Unnamed: 0,Title,Poem,Poet,Tags
6,6,\r\r\n Invisible Fish\r\r\n...,\r\r\nInvisible fish swim this ghost ocean now...,Joy Harjo,"Living,Time & Brevity,Relationships,Family & A..."
7,7,\r\r\n Don’t Bother the Ear...,\r\r\nDon’t bother the earth spirit who lives ...,Joy Harjo,"Religion,The Spiritual,Mythology & Folklore,Fa..."
9,9,"\r\r\n [""Hour in which I co...","\r\r\nHour in which I consider hydrangea, a sa...",Simone White,"Living,Parenthood,The Body,The Mind,Nature,Tre..."
16,16,\r\r\n scars\r\r\n ...,\r\r\nmy father’s body is a map\r\r\na record ...,Truong Tran,"The Body,Family & Ancestors"
17,17,\r\r\n what remains two\r\r...,\r\r\nit has long been forgotten this practice...,Truong Tran,"Infancy,Parenthood,The Body"


In [168]:
searchfor = ['Love', 'Relationships']

In [169]:
# getting additional poems that contain the tag Love 
poems2 = df2['Poem']
#lovePoems2 = poems2.loc[df2['Tags'].str.contains('Love')]
lovePoems2 = poems2.loc[df2['Tags'].str.contains('|'.join(searchfor))]

In [170]:
lovePoems2.head()
print(len(lovePoems2))
lovePoems2 = lovePoems2.to_string()

4707


In [171]:
# concatenate lovePoems and lovePoems 2 to create more data
lovePoems = lovePoems1 + lovePoems2

In [172]:
mythPoems.head()

0    Let the bird of loudest lay\r\nOn the sole Ara...
1    Sir Charles into my chamber coming in,\r\nWhen...
2    Our vice runs beyond all that old men saw,\r\n...
3    Lo I the man, whose Muse whilome did maske,\r\...
4    Long have I longd to see my love againe,\r\nSt...
Name: content, dtype: object

In [49]:
naturePoems.head()

19    Why didst thou promise such a beauteous day,\r...
20    The welcome Sun from sea Freake is returned,\r...
21    I met a courtier riding on the plain,\r\nWell-...
22    Walking the fields a wantcatcher I spied,\r\nT...
23    Fishing, if I a fisher may protest,\r\nOf plea...
Name: content, dtype: object

In [174]:
# MAPPING CHARACTERS: for love poems, Make two dictionaries , one to convert chars to ints, the other to convert ints back to chars

chars = sorted(list(set(lovePoems)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [175]:
# process the dataset:
seqlen = 50
step = seqlen

data_X = []
data_y = []

poemLines = [] 

# creates poem lines 
for i in range(0, len(lovePoems) - seqlen - 1, step):
    poemLines.append(lovePoems[i: i + seqlen + 1])
    
# creating x and y data
data_X = np.zeros((len(poemLines), seqlen, len(chars)), dtype=np.bool)
data_Y = np.zeros((len(poemLines), seqlen, len(chars)), dtype=np.bool)

for i, poemLines in enumerate(poemLines):
    for t, (char_in, char_out) in enumerate(zip(poemLines[:-1], poemLines[1:])):
        data_X[i, t, char_indices[char_in]] = 1
        data_Y[i, t, char_indices[char_out]] = 1

In [176]:
# create the model
model = Sequential()
model.add(LSTM(128, input_shape=(seqlen, len(chars)), return_sequences=True))
model.add(Dense(len(chars), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=0.01), metrics=['categorical_crossentropy', 'accuracy'])
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_7 (LSTM)                (None, 50, 128)           128000    
_________________________________________________________________
dense_7 (Dense)              (None, 50, 121)           15609     
Total params: 143,609
Trainable params: 143,609
Non-trainable params: 0
_________________________________________________________________


In [None]:
# train the model
model.fit(data_X, data_Y, batch_size=128, epochs=80)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80

In [154]:
def sample(preds, temperature=1.0):
    """Helper function to sample an index from a probability array."""
    preds = np.asarray(preds).astype('float64')
    preds = np.exp(np.log(preds) / temperature)  # softmax
    preds = preds / np.sum(preds)                #
    probas = np.random.multinomial(1, preds, 1)  # sample index
    return np.argmax(probas)  

In [157]:
def generating_poem(epoch, start_index, diversity, _):
    """Function invoked at end of each epoch. Prints generated text."""
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = start_index
    diversity = diversity

    generated = ''
    poemLines = lovePoems[start_index: start_index + seqlen]
    generated += poemLines
    print('----- Generating with seed: "' + poemLines + '"')
    sys.stdout.write(generated)

    for i in range(400):
        x_pred = np.zeros((1, seqlen, len(chars)))
        for t, char in enumerate(poemLines):
            x_pred[0, t, char_indices[char]] = 1.
            
        preds = model.predict(x_pred, verbose=0)
        next_index = sample(preds[0, -1], diversity)
        next_char = indices_char[next_index]

        poemLines = poemLines[1:] + next_char

        sys.stdout.write(next_char)
        sys.stdout.flush()
    print()

In [158]:
generating_poem(80, 50, .25, _)


----- Generating text after Epoch: 80
----- Generating with seed: ",\r...
73     Weret aught to me I bore the canopy,"
,\r...
73     Weret aught to me I bore the canopy,\r\nThat ...
85     The man I love we could be said my friend awain...
2005     \r\r\nI saw you have stupped have I see the silence again...
1064     \r\r\nThe silks and is grows like a silled me tour its that ...
9028     \r\r\nI saw you are you are the sessimes to say s...
9034     \r\r\nThe sun is my lover when you can so degrep on the m...
12835    \r\r\nI saw you are you seep of your asket th
