# Shakespeare x Spenser (HMM)

In [3]:
import os
import numpy as np
from IPython.display import HTML
import json
from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    parse_observations,
    sample_sentence,
    visualize_sparsities,
    animate_emission
)
import re
import sys
import random

## Data Preprocessing

In [105]:
def split_by_sonnet_shakespeare(text): 
    """
    Split Shakespeare text by sonnet. 
    """
    return shakespeare_text.split("\n\n\n")

def split_by_sonnet_spenser(text): 
    """
    Split Spenser text by sonnet. 
    """
    sonnets = text.split("\n\n")
    # Remove sonnet number
    return [sonnets[i] for i in range(len(sonnets)) if i % 2 == 1]

def clean_shakespeare(text): 
    sonnets = split_by_sonnet_shakespeare(text)
    all_lines = []
    for i in range(0, len(sonnets)): 
        lines = sonnets[i].split("\n")
        for j in range(1, len(lines)):
            all_lines.append(lines[j])
    return '\n'.join(all_lines)

def clean_spenser(text): 
    sonnets = split_by_sonnet_spenser(text)
    return '\n'.join(sonnets)

In [94]:
# Import text
shakespeare_text = open(os.path.join(os.getcwd(), 'data_Shakespeare/shakespeare.txt')).read()

spenser_text = open(os.path.join(os.getcwd(), 'data_Shakespeare/spenser.txt')).read()

In [95]:
# Split into sonnets
shakespeare_sonnets = split_by_sonnet_shakespeare(shakespeare_text)
spenser_sonnets = split_by_sonnet_spenser(spenser_text)

In [106]:
# Process text (Remove sonnet number)
shakespeare_clean = clean_shakespeare(shakespeare_text)
spenser_clean = clean_spenser(spenser_text)

In [97]:
# Incorporate both Shakespeare and Spenser sonnets
full_text = shakespeare_clean + spenser_clean
obs, obs_map = parse_observations(full_text)

# Shakespeare x Spenser Sonnets

In [6]:
hmm = unsupervised_HMM(obs, 16, 100)

Iteration: 10
Iteration: 20
Iteration: 30
Iteration: 40
Iteration: 50
Iteration: 60
Iteration: 70
Iteration: 80
Iteration: 90
Iteration: 100


In [11]:
for i in range(0, 14):    
    print(sample_sentence(hmm, obs_map, n_words=8))

Belay my seed amen, store for i and
Not body, dwell, preyed love rights shall golden
With a fire, sinful love's toil, catch light
Bold, doting mark latch, sickness 'gainst filthy therewith
Whose self-example will heaven thy love when by
Beauty's argument nor whilst i will the left
Ye part, thy in private they in the
Not seem lack will me, i darkness and
Pretty sphere outworn black love the thee doth
Hardly is fire of i'll towers is having
Leave you are nipples for my art forth,
Traffic in therefore, blenches dare be tempting their
Let my is, dost live, spend, not did
With lovely, heaven's counterfeit, hearts, struck should forelock


# Rhyme

### Helper Functions (Generate Emissions)

In [17]:
def generate_emission1(self, M, seed):
        '''
        Generates an emission of length M, assuming that the starting state
        is chosen uniformly at random. 

        Arguments:
            M:          Length of the emission to generate.

        Returns:
            emission:   The randomly generated emission as a list.

            states:     The randomly generated states as a list.
        '''
        
        #O: The (i, j)^th element is the probability of
        # emitting observation j given state i.
        O = np.array(self.O)
        emission = []
        emission.append(seed)
       
        # generate first state given this emission 
        col_states = O[:, seed]
       
        # Choose the state having the highest probability of generating
        # this emission. 
        
        state = np.argmax(col_states)       
        states = []
        for t in range(M-1):
            # Append state.
            states.append(state)

            # Sample next observation.
            rand_var = random.uniform(0, 1)
            next_obs = 0

            while rand_var > 0:
                rand_var -= self.O[state][next_obs]
                next_obs += 1

            next_obs -= 1
            emission.insert(0, next_obs)

            # Sample next state.
            rand_var = random.uniform(0, 1)
            next_state = 0

            while rand_var > 0:
                rand_var -= self.A[state][next_state]
                next_state += 1

            next_state -= 1
            state = next_state

        return emission, states
    
def obs_map_reverser1(obs_map):
    obs_map_r = {}

    for key in obs_map:
        obs_map_r[obs_map[key]] = key

    return obs_map_r

def sample_sentence1(hmm, obs_map, seed, n_words=100):
    # Get reverse map.
    seed_idx = obs_map[seed]
    obs_map_r = obs_map_reverser1(obs_map)

    # obs_map is of the form key=word: val=identifier
    # Pass the identifier 

    
    # Sample and convert sentence.
    emission, states = generate_emission1(hmm, n_words, seed_idx)
    sentence = [obs_map_r[i] for i in emission]

    return ' '.join(sentence).capitalize()

def parse_observations_reverse(text):
    # Convert text to dataset.
    lines = [line.split() for line in text.split('\n') if line.split()]

    obs_counter = 0
    obs = []
    obs_map = {}

    for line in reversed(lines):
        obs_elem = []
        
        for word in reversed(line):
            word = re.sub(r'[^\w\-\']', '', word).lower() 
            #word = re.sub(r'[^\w]', '', word).lower()
            if word not in obs_map:
                # Add unique words to the observations map.
                obs_map[word] = obs_counter
                obs_counter += 1
            
            # Add the encoded word.
            obs_elem.append(obs_map[word])
        
        # Add the encoded sequence.
        obs.append(obs_elem)

    return obs, obs_map

### Helper Functions (Generating Rhyme Dictionary)

In [46]:
def build_rhyme_pairs_shakespeare(sonnets): 
    """
    Build a dictionary of rhyme pairs and their frequency of 
    occurrence in Shakespeare's sonnets. 
    """
    rhyme_pairs = dict()
    
    for i in range(0, len(sonnets)): 
        sonnet_num = i + 1
        # Excluce Sonnet 99, Sonnet 126, and Sonnet 145. 
        # Sonnet 99 has 15 lines, Sonnet 126 has 12 lines.
        # All other sonnets follow the same rhyme scheme.
        if sonnet_num == 99 or sonnet_num == 126: 
            
            # Sonnet 99 has 15 lines of rhyme scheme ababa cdcd efef gg. 
            # We will throw out the rhyme triple a. 
            if sonnet_num == 99: 
                lines = sonnets[i].split("\n")
                end_words = [' ']
                
                for j in range(1, 16): 
                    words = lines[j].split(' ')
                    last_word = words[-1].strip()
                    last_word = re.sub(r'[^\w\-\']', '', last_word).lower()
                    end_words.append(last_word)
                
                # Create pairs of rhyming words. 
                rhyme1 = (end_words[2], end_words[4])
                rhyme2 = (end_words[6], end_words[8])
                rhyme3 = (end_words[7], end_words[9])
                rhyme4 = (end_words[10], end_words[12])
                rhyme5 = (end_words[11], end_words[13])
                rhyme6 = (end_words[14], end_words[15])
                
                rhymes = [rhyme1, rhyme2, rhyme3, rhyme4, rhyme5, rhyme6]
                
                # Add rhyming pairs to dictionary and keep in track of their 
                # frequency of occurrence. 
                for pair in rhymes: 
                    if pair in rhyme_pairs: 
                        rhyme_pairs[pair] += 1
                    else: 
                        rhyme_pairs[pair] = 1
            
            # Sonnet 126 has 12 lines of rhyme scheme aa bb cc dd ee ff. 
            if sonnet_num == 126: 
                lines = sonnets[i].split("\n")
                end_words = [' ']
                for j in range(1, 13): 
                    words = lines[j].split(' ')
                    last_word = words[-1].strip()
                    last_word = re.sub(r'[^\w\-\']', '', last_word).lower()
                    end_words.append(last_word)
                # Create pairs of rhyming words. 
                rhyme1 = (end_words[1], end_words[2])
                rhyme2 = (end_words[3], end_words[4])
                rhyme3 = (end_words[5], end_words[6])
                rhyme4 = (end_words[7], end_words[8])
                rhyme5 = (end_words[9], end_words[10])
                rhyme6 = (end_words[11], end_words[12])
                
                rhymes = [rhyme1, rhyme2, rhyme3, rhyme4, rhyme5, rhyme6]
                
                # Add rhyming pairs to dictionary and keep in track of their 
                # frequency of occurrence. 
                for pair in rhymes: 
                    if pair in rhyme_pairs: 
                        rhyme_pairs[pair] += 1
                    else: 
                        rhyme_pairs[pair] = 1
                    
        else: 
            # Append the last word of each line (remove punctuation)
            lines = sonnets[i].split("\n")
            end_words = [' ']
            for j in range(1, 15): 
                words = lines[j].split(' ')
                last_word = words[-1].strip()
                last_word = re.sub(r'[^\w\-\']', '', last_word).lower()
                end_words.append(last_word)
            
            # Create pairs of rhyming words. Shakespeare's sonnets has the 
            # following rhyming structure per line: abab, cdcd, efef, gg. 
            rhyme1 = (end_words[1], end_words[3])
            rhyme2 = (end_words[2], end_words[4])
            rhyme3 = (end_words[5], end_words[7])
            rhyme4 = (end_words[6], end_words[8])
            rhyme5 = (end_words[9], end_words[11])
            rhyme6 = (end_words[10], end_words[12])
            rhyme7 = (end_words[13], end_words[14])
            
            rhymes = [rhyme1, rhyme2, rhyme3, rhyme4, rhyme5, rhyme6, rhyme7]
            
            # Add rhyming pairs to dictionary and keep in track of their 
            # frequency of occurrence. 
            for pair in rhymes: 
                if pair in rhyme_pairs: 
                    rhyme_pairs[pair] += 1
                else: 
                    rhyme_pairs[pair] = 1
                    
    return rhyme_pairs

def build_rhyme_pairs_spenser(sonnets): 
    """
    Build a dictionary of rhyme pairs and their frequency of 
    occurrence in Spenser's sonnets. 
    """
    # Spenser sonnets have the following rhyme scehme: abab bcbc cdcd ee. 
    # We keep rhyme pairs a, d, e separate from b, c. 
    # https://sites.udel.edu/britlitwiki/amoretti/
    
    rhyme_pairs = dict()
    rhyme_quads = dict()
    for i in range(0, len(sonnets)): 
        lines = sonnets[i].split("\n")
        end_words = [' ']
        # Ignore irregular Spenser sonnets
        if len(lines) != 14: 
            pass
        else: 
            for j in range(0, 14): 
                line = lines[j].strip()
                words = line.split(' ')
                last_word = words[-1].strip()
                last_word = re.sub(r'[^\w\-\']', '', last_word).lower()
                end_words.append(last_word)
            # Create rhyming words dictionary
            rhyme1 = (end_words[1], end_words[3])
            rhyme2 = (end_words[2], end_words[4], end_words[5], end_words[7])
            rhyme3 = (end_words[6], end_words[8], end_words[9], end_words[11])
            rhyme4 = (end_words[10], end_words[12])
            rhyme5 = (end_words[13], end_words[14])
            
            rhymes1 = [rhyme1, rhyme4, rhyme5]
            rhymes2 = [rhyme2, rhyme3]
            
            # Add rhyming pairs to dictionary and keep in track of their 
            # frequency of occurrence. 
            for pair in rhymes1: 
                if pair in rhyme_pairs: 
                    rhyme_pairs[pair] += 1
                else: 
                    rhyme_pairs[pair] = 1
                    
            for pair in rhymes2: 
                if pair in rhyme_quads: 
                    rhyme_quads[pair] += 1
                else: 
                    rhyme_quads[pair] = 1
                    
    return rhyme_pairs, rhyme_quads

### Helper Functions (Writing Poems with Rhymes)

In [31]:
def write_poem_shakespeare_rhyme(hmm, obs_map, rhyme_pairs, n_words): 
    """
    Generate a poem following the rhyme scheme of a typical Shakespeare
    Sonnet (abab cdcd efef gg)
    """
    poem = ''
    (a1, a2) = rhyme_pairs[0]
    (b1, b2) = rhyme_pairs[1]
    (c1, c2) = rhyme_pairs[2]
    (d1, d2) = rhyme_pairs[3]
    (e1, e2) = rhyme_pairs[4]
    (f1, f2) = rhyme_pairs[5]
    (g1, g2) = rhyme_pairs[6]
    
    poem += sample_sentence1(hmm, obs_map, a1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, b1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, a2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, b2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, c1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, d1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, c2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, d2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, e1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, f1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, e2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, f2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, g1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, g2, n_words=10)
    
    return poem

def write_poem_spenser_rhyme(hmm, obs_map, rhyme_pairs, rhyme_quads, n_words): 
    """
    Generate a poem following the rhyme scheme of a typical Shakespeare
    Sonnet (abab bcbc cdcd ee)
    """
    poem = ''
    (a1, a2) = rhyme_pairs[0]
    (b1, b2, b3, b4) = rhyme_quads[0]
    (c1, c2, c3, c4) = rhyme_quads[1]
    (d1, d2) = rhyme_pairs[1]
    (e1, e2) = rhyme_pairs[2]
    
    poem += sample_sentence1(hmm, obs_map, a1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, b1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, a2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, b2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, b3, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, c1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, b4, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, c2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, c3, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, d1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, c4, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, d2, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, e1, n_words=10) + '\n'
    poem += sample_sentence1(hmm, obs_map, e2, n_words=10)
    
    return poem

### Create Rhyme Dictionaries (Shakespeare and Spenser)

In [48]:
shakespeare_rhyme_pairs = build_rhyme_pairs_shakespeare(shakespeare_sonnets)
spenser_rhyme_pairs, spenser_rhyme_quads = build_rhyme_pairs_spenser(spenser_sonnets)

### Generating Poems (Shakespeare Text only, with Shakespeare Rhyme)

In [19]:
obs_reversed_shakespeare, obs_map_reversed_shakespeare = parse_observations_reverse(shakespeare_clean)
hmm3 = unsupervised_HMM(obs_reversed_shakespeare, 16, 100)

Iteration: 10
Iteration: 20
Iteration: 30
Iteration: 40
Iteration: 50
Iteration: 60
Iteration: 70
Iteration: 80
Iteration: 90
Iteration: 100


In [61]:
shakespeare_rhyme = random.sample(list(shakespeare_rhyme_pairs.keys()), 7)
print(write_poem_shakespeare_rhyme(hmm3, obs_map_reversed_shakespeare, shakespeare_rhyme, 8))

These edge to one large plague of thee dote receivest
Of your which i not level thou gives worst sorrow
Form of is summer from being hours commits end deceivest
Supposed on to self adjunct and unwooed this day morrow
Me at then as wouldst such the to expense legacy
Within every and checked tongue mind days on argument thee
The and but thee as add the moan wide free
Me up to in the grounded unused world date melancholy
Was cannot my with such and and strive what's unprovident
Am moving dare hallowed a do doting my horse derive
The we and for rocks a my should me evident
To true doth tender like lambs wretch were to-day thrive
The of year it the and mine celestial that shame
Were yet engraft this bends wit all the thought name


### Generating Poems (Shakespeare + Spenser Text, with Shakespeare Rhyme)

In [18]:
obs_reversed_full, obs_map_reversed_full = parse_observations_reverse(full_text)
hmm2 = unsupervised_HMM(obs_reversed_full, 16, 100)

Iteration: 10
Iteration: 20
Iteration: 30
Iteration: 40
Iteration: 50
Iteration: 60
Iteration: 70
Iteration: 80
Iteration: 90
Iteration: 100


In [66]:
shakespeare_rhyme = random.sample(list(shakespeare_rhyme_pairs.keys()), 7)
print(write_poem_shakespeare_rhyme(hmm2, obs_map_reversed_full, shakespeare_rhyme, 8))

Seen beat that my wilt and thou your beauty sin
But thee three-score of my sweet eye-glances no faults love
Light like friends my sweet bred that poets humble begin
Last appease thankless persever of their mind doth goodly move
Is my world's and with oft they harvest direct idolatry
To milder what never to pleasant heart even live here
Thence it is times and both with tell added be
Breast which self-love of needs others' my beauty happy uprear
Night they a mourning but whose heavy be of hid
Then too mind's worst to more gifts by none mark
I the lowly and night but virtuous in apply forbid
If art still diving when and fingers maid entire bark
Rind of your son will that subject an that thee
And you and brought did my my the sovereign be


### Generating Poems (Shakespeare + Spenser Text, with Spenser Rhyme)

In [82]:
spenser_rhyme1 = random.sample(list(spenser_rhyme_pairs.keys()), 3)
spenser_rhyme2 = random.sample(list(spenser_rhyme_quads.keys()), 2)
print(write_poem_spenser_rhyme(hmm2, obs_map_reversed_full, spenser_rhyme1, spenser_rhyme2, 8))

Mourn which reckoning fairest yet tells grecian yet copy behold
Harder unless thus compare repair bring do where youth made
The more but may it knew a and free mould
Ye clouds blessing ill as more bail in spill fade
Any breaches unless me slight short not to bands trade
Guess terms my breast when construe me will wretch arrive
That to she and then shall prove the'accomplishment of assayed
Love to conceive can not find do can well alive
O means if you is by be praises with achieve
Frantic-mad for still returned have gentle of from imitated beguile
Stone and zealous both the by how all him deprive
Virtue is silence in her applying in resty art spoil
When from better may least me his my displayed reposeth
Love doth whether too have display to the assay supposeth
