In [2]:
import HMM
import sonnet_utils as utils

In [3]:
sonnets, syl_dict, num_dict, token_dict = utils.process_data('./data/Syllable_dictionary.txt', './data/shakespeare.txt')

In [36]:
# Return a token with the first alphanumeric character capitalized.
def capitalize(token):
    for i, c in enumerate(token):
        if c.isalnum():
            break
    return token[0:i] + c.upper() + token[i+1:(len(token))]           

# Turns the sonnet lines into a sonnet.
def process_lines(sonnet_lines, punct):
    processed_lines = [[] for i in range(len(sonnet_lines))]
    # Moves punctuation from beginning of line to end of previous line.
    for i, line in enumerate(sonnet_lines):
        first_word_idx = i-1 if line[0] in punct else i
        if first_word_idx != -1:
            processed_lines[first_word_idx].append(line[0])
        for j in range(1, len(line)):
            processed_lines[i].append(line[j])
    
    processed_lines[0][0] = capitalize(processed_lines[0][0])
    for j, line in enumerate(processed_lines):
        for i, token in enumerate(line):
            if token in ['.', '!', '?']:
                if i != len(line) - 1:
                    line[i+1] = capitalize(line[i+1])
                else:
                    processed_lines[j+1][0] = capitalize(processed_lines[j+1][0]) 
            elif token in ['i', "i'll"]:
                line[i] = capitalize(line[i])
    
    if processed_lines[-1][-1] != '.':
        processed_lines[-1].append('.')
    
    joined_lines = [' '.join(tokens) for tokens in processed_lines]
    return '\n'.join(joined_lines)

import random

def generate_states(hmm, seq_len):
    states = [random.choice(range(hmm.L))]
    state = states[0]
    for i in range(1, seq_len):
        rand = random.random()
        sum_probs = 0
        next_state = hmm.L - 1
        for j, prob in enumerate(hmm.A[state]):
            sum_probs += prob
            if rand < sum_probs:
                next_state = j
                break
        states.append(next_state)
        state = next_state
    return states

def generate_emission(hmm, state):
    rand = random.random()
    sum_probs = 0
    for i, prob in enumerate(hmm.O[state]):
        sum_probs += prob
        if rand < sum_probs:
            return i
    
    return hmm.D - 1

def generate_haiku(hmm, syl_dict, num_dict):
    punctuation = ['.',',',':', '?', '!', ';']
    states = generate_states(hmm, 50)
    lines = [[] for i in range(3)]
    line_lens = [5, 7, 5]
    state = 0
    prev_token_is_punct = False
    for i in range(3):
        line = lines[i]
        # List of possible number of syllables in the line so far
        possible_syls = [0]
        while True:
            token_num = generate_emission(hmm, states[state])
            token = num_dict[token_num]
            token_syls = syl_dict[token]
            next_possible_syls = []
            for num_syls in possible_syls:
                for syls in token_syls[0] + token_syls[1]:
                    next_possible_syls.append(syls + num_syls)

            # Generate another token and try again with the same state.
            # Allow the line if it is only one syllable off.
            if min(next_possible_syls) > line_lens[i] and max(possible_syls) < line_lens[i] - 1:
                continue
                        
            cur_token_is_punct = token in punctuation
            if prev_token_is_punct and cur_token_is_punct:
                continue
            
            prev_token_is_punct = cur_token_is_punct
            
            if line_lens[i] in next_possible_syls or min(next_possible_syls) > line_lens[i]:
                line.append(token)
                state += 1
                break         
            
            # In this case, the line is not complete. Add the token to the sonnet,
            # update possible_syls, and increment state.
            line.append(token)
            state += 1
            possible_syls = [a + b for a in possible_syls for b in token_syls[0]]
    return process_lines(lines, punctuation)    

In [43]:
number_sonnets = [[token_dict[token] for token in sonnet] for sonnet in sonnets]
hmm = HMM.unsupervised_HMM(number_sonnets, 50, 50)

Iteration: 10
Iteration: 20
Iteration: 30
Iteration: 40
Iteration: 50


In [45]:
for i in range(10):
    print(generate_haiku(hmm,syl_dict, num_dict))
    print('\n')

But best ink contents
me , when maiden time . Be thou
harder I thy name .


Ill to her , or for
my am shadow the weed of
these , and falls , and all .


Give boat , and true of
sepulchres , like in the willing
provide foul and carve .


World till no woe with
love's poet thy beauty rage ,
before being take .


Foul no gone were this
purpose , and in fist upon
the cheer , nor horse to .


That in my dream , and
to thy golden from my abuse .
Spurring do thou hadst .


But eyes she faster
for let a judgment on the
best on one , summer's .


Successive thou : nor
for my lays . And I knows day :
such that I saw from .


Both ; and tender thou
boundless aright , all own think
to that ! For mind find .


When up-locked cast
I thy loving canker and
good plagues for but waste .


