This code concatenates each word from the transcript yaml file with the eye data at the time the word was said. Outputs into another yaml file. 

In [40]:
import os
import sys
import csv
import yaml

# name of the csv file containing eyedata
eyedata_filename = "../cookietheftdata/PC0039/PC0039_red_eyegaze_aligned.csv"

# name of the transcript file
transcript_filename = "../cookietheftdata/PC0039/PC0039_red_transcript_par.yaml" 

# name of the yaml file to write to
text_filename = "./realigned/PC0039_red_transcript_aligned_eyegaze.yaml"

if not os.path.exists(eyedata_filename):
    print(f"File '{eyedata_filename}' doesn't exist")
    sys.exit()

if not os.path.exists(transcript_filename):
    print(f"File '{transcript_filename}' doesn't exist")
    sys.exit()

# gets all of the eyedata in, skipping the header
with open(eyedata_filename, 'r') as eyedata:
    reader = csv.reader(eyedata)
    eye_rows = [row for row in reader]
    del eye_rows[0]

# iterates through the transcript
with open(transcript_filename, 'r') as transcript:
    data = yaml.safe_load(transcript)

results = []

# iterates through transcript and grabs end, start, and word, then appends the coords
for entry in data:
    if not 'result' in entry:
        continue
        
    for word_data in entry['result']:
        end = word_data.get('end', None)
        start = word_data.get('start', None)
        word = word_data.get('word', None)
        
        # if start or end are blank, it will break
        if start is None or end is None:
            raise ValueError("ERROR: No start or end time.")
        
        array = []
        # with the given start and end times, construct an array of coordinates from 
        # the eyedata file between those times, given the offset
        for i in range(1, len(eye_rows) - 1):
            if eye_rows[i][0] == '':
                continue

            if float(eye_rows[i][1]) > float(end):
                break
            
            if float(start) <= float(eye_rows[i][1]) <= float(end):
                point = [eye_rows[i][2], eye_rows[i][3]]
                array.append(point)

        new_entry = { 
            'end': end,
            'start': start,
            'word': word,
            'coords': array
        }
        
        results.append(new_entry)
    
with open(text_filename, 'w') as yamlf:
    yaml.dump(results, yamlf, default_flow_style=None, sort_keys=False)

print(f"Results saved to '{text_filename}'")

Results saved to './realigned/PC0039_red_transcript_aligned_eyegaze.yaml'
