# Utterance Position Code 9/11/2025
This code is meant to take us from a discrete initial/medial/final understanding of the utterance position variable, to a continuous one.
To solve this problem, we'll be doing a very similar process to that in HmongParsel, but with some procedure on the TextPitch function.
Since we don't want the code to run for a year, we'll take advantage of the fact that the F0 and F1 values already exist elsewhere
However, in order to take advantage of that fact, we need to handle token_id in the exact same way, so that we can join up the dataframes in R by that variable

In [7]:
import csv, tgt

In [8]:
# To start, we define the HenryTextFrame class as essentially a dictionary which can be thought of as a frame of a textgrid. 
# We'll be passing around lists of these objects. However, since they won't be used to actually index any audio or related objects, 
# we can do away with any info not related to the NAME and to UTTERANCE POSITION

class HenryTextFrame:
    def __init__(self, name: str, utterance_position: float):
        self.name = name
        self.utterance_position = utterance_position
        self.tone_init()
    
    def get_name(self) -> str:
        return self.name

    def get_up(self) -> float:
        return self.utterance_position
    
    def get_tone(self) -> str:
        return self.tone
    
    def __str__(self):
        print(f'TextFrame {self.name} with utterance position {self.utterance_position}.')

    def tone_init(self):
        interval_text_list = self.name.split('_')
        clean_word = interval_text_list[0]
        tone = clean_word[-1:]
        if tone not in ['b', 's', 'j', 'v', 'm', 'g', 'd']:
            tone = '0'
        self.tone = tone

In [9]:
def createNewHFrames(current: list) -> list:
    """
    Given the list of just names, create a list of HenryTextFrame objects to be appended to our big "grid" list
    """
    match len(current):
        case 0:
            return []
        case 1:
            position_increment = 0
        case _:
            position_increment = 1 / (len(current) - 1)
    
    current_position = 0
    return_list = []
    for name in current[:-1]: # slice off the last one to make sure the last ones are always exactly 1
        return_list.append(HenryTextFrame(name = name, utterance_position = current_position))
        # print(name, current_position)
        current_position = round(current_position + position_increment, 2)
    return_list.append(HenryTextFrame(name = current[-1], utterance_position = 1))
    # print(current[-1], 1)
    return return_list

def textProcess(file: str, tone: str) -> list:
    raw_grid = tgt.read_textgrid(f"./textgrid/{file}.TextGrid") # tgt TextGrid object

    # complete = False
    current = []
    position_encoded_textgrid = []
    for interval in raw_grid.tiers[0]:
        current_text = interval.text
        current_text_list = current_text.split('_')

        if 'ui' in current_text_list and len(current) > 0:
            # we want this to handle cases where an utterance starts without technically ending (we never saw the uf)
            # essentially just do everything that would occur below, but do it here!
            for newFrame in createNewHFrames(current):
                position_encoded_textgrid.append(newFrame)
            current = []

        current.append(current_text)

        if 'uf' in current_text_list:
            for newFrame in createNewHFrames(current):
                position_encoded_textgrid.append(newFrame)
            current = []

    for newFrame in createNewHFrames(current):
        position_encoded_textgrid.append(newFrame)

    return position_encoded_textgrid
        

In [11]:
def utterance_csv(data: list, tone: str):
    with open(f'utterance_position/hmongUtt-{tone}.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['token_id', 'UtterancePosition_continuous'])
        writer.writerows(data)

In [12]:
# Code that works through every given file for each speaker and story (from main file)

# Code that works through every given file for each speaker and story
tones = ['b', 's', 'j', 'v', 'm', 'g', 'd', '0']
for tone in tones:

    speakers = ['Cha', 'Chingla', 'Ellina', 'Gozong', 'Long', 'MaiXee', 'MaiXor', 'Ma']
    stories = ['1', '2', '3']

    data = []
    token_id = 0
    for speakerIndex in range(len(speakers)):
        speaker = speakers[speakerIndex]
        for story in stories:
            file = f'{speaker}Story{story}'

            # See write to csv section for data headers :)
            speaker_info = (file, speaker)
            for frame in textProcess(file, tone):
                if frame.get_tone() == tone:
                    data.append([token_id, frame.get_up()])
                    token_id += 1
    utterance_csv(data = data, tone = tone)