In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from poesy import Poem

In [3]:
text = """
Happy birthday to you
Happy birthday to you
Happy birthday dear Mary
Happy birthday to you
"""

In [4]:
scanned_text = Poem(text)

In [5]:
scanned_text.summary()

  (#s,#l)  parse                          rhyme      #feet    #syll    #parse
---------  -----------------------------  -------  -------  -------  --------
      1.1  HAP|py|BIRTH.DAY*|to.you       a              2        6         3
      1.2  HAP|py|BIRTH.DAY*|to.you       a              2        6         3
      1.3  HAP|py|BIRTH|day*|DEAR.MA*|ry  -              3        7         4
      1.4  HAP|py|BIRTH.DAY*|to.you       a              2        6         3


estimated schema
----------
meter: Iambic
feet: Complex (2-2-3-2)
syllables: 6
rhyme: Couplet (aa)


In [9]:
import poesy

In [83]:
from typing import Optional

class BaseParsedTextHandler():
    """
    Base abstraction for classes of parsed text.
    """

    def __init__(
            self, 
            parsed_text,
            new_line_chars: str = "\n",
            control_code_start_tag: str = "<PREF>", 
            control_code_end_tag: str = "</PREF>",
            control_code_length_tag: str = "<SYLLABLES: {length}>",
            control_code_end_syllable_tag: str = "<END: {syllable}>", 
            control_code_rhyme_tag: str = "<RHYME: {rhyme_id}>"
            ):
        
        self.parsed_text = parsed_text
        self.new_line_chars = new_line_chars
        self.control_code_start_tag = control_code_start_tag 
        self.control_code_end_tag = control_code_end_tag
        self.control_code_length_tag = control_code_length_tag 
        self.control_code_end_syllable_tag = control_code_end_syllable_tag
        self.control_code_rhyme_tag = control_code_rhyme_tag
        
        self.vowels = {"a", "e", "i", "o", "u", "A", "E", "I", "O", "U"}

    def syllabic_text(self, line_break_chars = "\n"):
        """
        Returns formatted parse of text
        """
        raise NotImplementedError()
    
class PoesyParsedTextHandler(BaseParsedTextHandler):

    def __init__(self, parsed_text):
        super().__init__(parsed_text=parsed_text)

    def _merge_end_syllables(self, split_syllables):
        """
        Sometimes an end syllable won't have vowels, 
        so we need to merge with the previous syllable to ensure
        a rhyme can be formed. E.g., "Mary" -> "Ma|ry" -> "Mary".
        """
        end_syllable = split_syllables[-1].strip()

        if self.vowels.isdisjoint(end_syllable):
            for i in reversed(range(len(split_syllables) - 1)):
                end_syllable = split_syllables[i].strip() + end_syllable
                if not self.vowels.isdisjoint(end_syllable):
                    break

        return(end_syllable)

    def control_code(
            self,
            syllabic_text = False, 
        ):
        
        lines = ""
        # syllable_counts 

        control_code = self.control_code_start_tag + self.new_line_chars
        rhyme_id_counter = {}

        for line in self.parsed_text.lineld:

            # Note: it would be nice to retain some capitals (e.g. proper nouns,
            # capitals in original text) but, I'll leave that for another time.
            syllables = line.get("parse").replace('*', '').lower().split('.')
            split_syllables = [i.split('|') for i in syllables]
            split_syllables = [item for sublist in split_syllables for item in sublist]
 
            end_syllable = self._merge_end_syllables(split_syllables)
            
            
            # end_syllable = syllables[-1].split('|')[-1].strip()

            control_code += self.control_code_length_tag.format(length=line.get("num_sylls"))
            control_code += self.control_code_rhyme_tag.format(rhyme_id=line.get("rhyme").upper())
            # Control code shouldn't constrain rhyme, it should be 
            # freely generated. 
            # control_code += self.control_code_end_syllable_tag.format(syllable=end_syllable)
            control_code += self.new_line_chars
            
            if syllabic_text:
                # Note: it would be nice to retain some capitals (e.g. proper nouns,
                # capitals in original text) but, I'll leave that for another time.
                lines += ' '.join(syllables) + self.new_line_chars
            
        control_code += self.control_code_end_tag
        return control_code
    

        

parsed_text = PoesyParsedTextHandler(scanned_text)
control_code = parsed_text.control_code()
print(control_code)




<PREF>
<SYLLABLES: 6><RHYME: A>
<SYLLABLES: 6><RHYME: A>
<SYLLABLES: 7><RHYME: ->
<SYLLABLES: 6><RHYME: A>
</PREF>
