Markov Chain Poem Generator

In [7]:
import markovify
import pyphen
import re
import json
import re

In [30]:
with open('byron_processed_poems.json') as f:
    byron_poems = json.load(f)

text = ' '.join([poem['lines'] for poem in byron_poems])
text_model = markovify.Text(text, state_size=2)


def syllable_count(sentence):
    dic = pyphen.Pyphen(lang='en')
    words = re.findall(r'\b\w+\b', sentence)
    syllables = sum([len(dic.inserted(word).split('-')) for word in words])
    return syllables

def is_iambic_pentameter(sentence):
    syllables = syllable_count(sentence)
    return syllables == 10

def generate_iambic_line():
    while True:
        sentence = text_model.make_sentence()
        if sentence and is_iambic_pentameter(sentence):
            return sentence

def generate_poem(num_lines=10):
    poem = []
    for _ in range(num_lines):
        line = generate_iambic_line()
        poem.append(line)
    return '\n'.join(poem)

if __name__ == "__main__":
    poem = generate_poem()
    print(poem)

So _He_ has cut his country's long ago.
And thou--who tell'st me to rapture again.
My boat is on the Conqueror's head!
So _He_ has cut his country's long ago.
I. The man who cut his throat at last!--He!
I. The man who cut his throat at last!--He!
Sonnet composed in the bosoms of Gath!
So _He_ has cut his country's long ago.
So _He_ has cut his country's long ago.
I thought from my breast your fickle bosom please?


In [33]:
import json
import markovify
import pronouncing
import pyphen
import re

# Load Byron poems from JSON file
with open('byron_processed_poems.json') as f:
    byron_poems = json.load(f)

# Prepare text for Markov model by joining all poem lines
text = ' '.join([poem['lines'] for poem in byron_poems])

# Create Markov model with state_size=2 (considers two words for each prediction)
text_model = markovify.Text(text, state_size=2)

# Function to extract stress pattern from CMU Pronouncing Dictionary
def get_stress_pattern(word):
    pronunciations = pronouncing.phones_for_word(word)
    if pronunciations:
        # Take the first pronunciation variant
        return pronouncing.stresses(pronunciations[0])
    return ''

# Function to check if a sentence follows the unstressed-stressed iambic pattern
def is_iambic(sentence):
    words = re.findall(r'\b\w+\b', sentence)
    stress_pattern = ''
    
    for word in words:
        word_stress = get_stress_pattern(word.lower())
        if word_stress:
            stress_pattern += word_stress

    # Iambic pentameter requires alternating unstressed (0) and stressed (1) pattern over 10 syllables
    # Strip secondary stress (2) since it counts as unstressed in this context
    stress_pattern = stress_pattern.replace('2', '0')
    
    # Ensure we have exactly 10 syllables
    if len(stress_pattern) != 10:
        return False

    # Check the alternating unstressed (0) and stressed (1) pattern
    for i in range(0, 10, 2):
        if not (stress_pattern[i] == '0' and stress_pattern[i + 1] == '1'):
            return False
    
    return True

# Function to generate a valid iambic pentameter line using the Markov model
def generate_iambic_line():
    while True:
        sentence = text_model.make_sentence()
        if sentence and is_iambic(sentence):
            return sentence

# Function to generate a full poem with a specified number of lines (default: 14 lines, sonnet form)
def generate_poem(num_lines=14):
    poem = []
    for _ in range(num_lines):
        line = generate_iambic_line()
        poem.append(line)
    return '\n'.join(poem)

# Main execution block to generate and print a poem
if __name__ == "__main__":
    poem = generate_poem()
    print(poem)


KeyboardInterrupt: 