In [1]:
import nltk
import numpy as np 

from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    parse_observations,
    sample_sentence,
    visualize_sparsities,
    animate_emission
)

In [2]:
raw = open("data/shakespeare.txt").read()

In [11]:
word_dict = []
syllables = {}

# Wrangle syllable data
with open('data/Syllable_dictionary.txt') as file:
    for line in file:
        word = line.strip('\n').split(' ')
        word_dict.append(word[0])
        for syl in word[1:]:
            if syl not in syllables:
                syllables[syl] = [word_dict[-1]]
            else:
                syllables[syl].append(word_dict[-1])
file.close()

word_dict = ["\n"] + word_dict

In [17]:
sonnets = []
rhymes = []

start = 0

# Wrangle sonnets using syllables
with open('data/shakespeare.txt') as file:
    for ind, line in enumerate(file):
        son = []
        if line == '\n':
            continue
        if line.strip().isdigit():
            start = ind
            continue
        words = line.strip().split(' ')
        
        # Add word to list
        for word in words:
            word = word.lower()
            
            # If it's a word in our word_dict
            if word in word_dict:
                son.append(word_dict.index(word))
            
            # Else check if we can strip off the punct
            elif word[1:] in word_dict:
                if word[0] not in word_dict:
                    word_dict.append(word[0])
                    syllables['0'].append(word[0])
                
                # Add punctuation and word
                son.append(word_dict.index(word[0]))
                son.append(word_dict.index(word[1:]))
                
            elif word[:-1] in word_dict:
                if word[-1] not in word_dict:
                    word_dict.append(word[-1])
                    syllables['0'].append(word[-1])
                
                # Add word and punctuation
                son.append(word_dict.index(word[:-1]))
                son.append(word_dict.index(word[-1]))
                
            continue
            
        sonnets.append(son)
        
        # Add rhyme pairs
        if len(word_dict[son[-1]]) > 1:
            last_word = word_dict[son[-1]]
        else: last_word = word_dict[son[-2]]
        
        # If lines 1, 2, 5, 6, 9, 10
        if (ind - start) < 3 or ((ind - start) > 4 and (ind - start) < 7) or ((ind - start) > 8 and (ind - start) < 11):
            rhymes.append(last_word)
        
        # If line 13
        elif (ind - start) == 13:
            rhymes.append(last_word)
            
        # If line 14
        elif (ind - start) == 14:
            rhymes.append((last_word, rhymes[-1]))
            rhymes[-2] = (rhymes[-2], last_word)
        
        # If any other line
        else:
            rhymes.append((last_word, rhymes[-2]))
            rhymes[-3] = (rhymes[-3], last_word)
        
file.close()

In [18]:
rhymes

[('increase', 'decease'),
 ('die', 'memory'),
 ('decease', 'increase'),
 ('memory', 'die'),
 ('eyes', 'lies'),
 ('fuel', 'cruel'),
 ('lies', 'eyes'),
 ('cruel', 'fuel'),
 ('ornament', 'content'),
 ('spring', 'niggarding'),
 ('content', 'ornament'),
 ('niggarding', 'spring'),
 ('be', 'thee'),
 ('thee', 'be'),
 ('brow', 'now'),
 ('field', 'held'),
 ('now', 'brow'),
 ('held', 'field'),
 ('lies', 'eyes'),
 ('days', 'praise'),
 ('eyes', 'lies'),
 ('praise', 'days'),
 ('use', 'excuse'),
 ('mine', 'thine'),
 ('excuse', 'use'),
 ('thine', 'mine'),
 ('old', 'cold'),
 ('cold', 'old'),
 ('viewest', 'renewest'),
 ('another', 'mother'),
 ('renewest', 'viewest'),
 ('mother', 'another'),
 ('womb', 'tomb'),
 ('husbandry', 'posterity'),
 ('tomb', 'womb'),
 ('posterity', 'husbandry'),
 ('thee', 'see'),
 ('prime', 'time'),
 ('see', 'thee'),
 ('time', 'prime'),
 ('be', 'thee'),
 ('thee', 'be'),
 ('spend', 'lend'),
 ('legacy', 'free'),
 ('lend', 'spend'),
 ('free', 'legacy'),
 ('abuse', 'use'),
 ('give', '

In [6]:
for key in syllables:
    syllables[key] = [word_dict.index(word) for word in syllables[key]]

In [7]:
hmm4 = unsupervised_HMM(sonnets, 16, 100, syllables)

Iteration: 10
Iteration: 20
Iteration: 30
Iteration: 40
Iteration: 50
Iteration: 60
Iteration: 70
Iteration: 80
Iteration: 90
Iteration: 100


In [8]:
token_dict = {word_dict[ind]:ind for ind in range(len(word_dict))}

In [9]:
print('Sample Sentence:\n====================')
print(sample_sentence(hmm4, token_dict, n_words=14))

Sample Sentence:
Even was am divine fair affords of , 
 can alas thy the rotten reason in , 
 the art love's builded be not my me said ! 
 pattern to false eloquence of made of , 
 have nature exceed thus doth good little , 
 wardrobe your love right is another could , 
 alone to should strife you set , my by fear , 
 upon me body's early more frown die , 
 lover do pitch dull more out upon were . 
 leave enemies my my endure not ride , 
 save slanderers the , and rid with prevent ! 
 wakened time after leisure both are be , 
 when in show by thoughts worst most me all and : 
 me receives when may one his , in cause story , 
...


In [None]:
wordclouds = states_to_wordclouds(hmm4, token_dict)

# Rhyming