# Simple Dialogues - Anna Playground

## Basic functions to expand simple text to pronunciation descriptions.

In [4]:
# Imports
import pandas as pd
import pronouncing
import re


# Read data from external mappings, and convert to Pandas DF.
pd.set_option('display.max_colwidth', 0)
consonants = pd.read_csv('consonants.csv')
vowels = pd.read_csv('vowels.csv')


# Conversions
def wordToPhonemeList(word):
    """Converts a single word to list of phonemes."""
    phoneme_str = pronouncing.phones_for_word(word)[0]
    phoneme_str_nostress = ''.join([i for i in phoneme_str if not i.isdigit()])
    return phoneme_str_nostress.split(' ')


def phonemeToDescription(phoneme, inventory):
    """Converts a single phoneme to its articulatory description."""
    return inventory[inventory['cmu_symbol']==phoneme]['description'].to_string(index=False).strip()


def wordToDescription(word):
    """Converts a single word to its articulatory description."""
    list_description = ''
    for phoneme in wordToPhonemeList(word):
        description = ''
        if consonants['cmu_symbol'].str.contains(phoneme).any():
            description = phonemeToDescription(phoneme, consonants)
        elif vowels['cmu_symbol'].str.contains(phoneme).any():
            description = phonemeToDescription(phoneme, vowels)
        list_description += ' ' + description
            
    return list_description.strip()


def docToDescription(doc):
    """Converts a whole document to its articulatory description."""
    doc_description = ''
    word_list = re.sub(r'[^\w\s\']', '', doc)
    for word in word_list.split(' '):
        doc_description += wordToDescription(word)
    return doc_description.strip()


def replaceTags(doc, name, pro, pro_pos):
    """Replaces tags like <PRO> with actual content."""
    # Basic pronoun swap.
    doc = re.sub(r'<PRO>', pro, doc)
    doc = re.sub(r'<PRO_POS>', pro_pos, doc)
    # Add name.
    doc = doc.replace(pro, name, 1)
    # Capitalize everything after end punctuation.
    punct_re = re.compile('([.!?]\s*)')
    split_on_punct = punct_re.split(doc)
    doc = ''.join([i.capitalize() for i in split_on_punct])
    return doc

    

## Given a title, prologue, dialogue, and epilogue - generate a chapter.

In [5]:
def createChapter(title, prologue, dialogue, epilogue):
    # Begin writing the chapter by adding the title.
    chapter = title.upper() + "\n\n"
    
    # Add the prologue text.
    chapter += prologue + "\n\n"
    
    # Add the expanded dialogue text.
    for turn in dialogue:
        # Parse dialogue info.
        speaker = turn[0].upper()
        pronoun = turn[1]
        pronoun_possessive = turn[2]
        turn_simple = turn[3]
        # Expand the dialogue turn.
        turn_expanded = docToDescription(turn_simple)
        turn_final = replaceTags(
            turn_expanded,
            speaker,
            pronoun,
            pronoun_possessive
        ) + "\n\n"
        chapter += turn_final

    # Add the epilogue text.
    chapter += epilogue
    
    # Return the whole chapter.
    return chapter

## Example data and chapter generation.

In [6]:
title = "Coffee"
prologue = "Raymond approaches the counter."
dialogue = [
    ["Raymond", "he", "his", "Small drip please."],
    ["The barista", "she", "her", "Cream or sugar?"],
    ["Raymond", "he", "his",  "Small drip please."],
    ["The barista", "she", "her",  "Your Name?"],
    ["Raymond", "he", "his",  "Raymond."],
]
epilogue = "The barista nods, reaches for a cup, and begins his order."

print(len(createChapter(title, prologue, dialogue, epilogue)))

9110
