# TextGrid creation for mouse tracking data

## import modules

In [1]:
%matplotlib inline
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from json import load
import os
import tgt

## functions from the evaluation notebook

In [2]:
def find_file(suffix, path):
    """
    return the filename of the log file
    """
    result_files = []
    for root, dirs, files in os.walk(path):
        for file in [f for f in files if f.endswith(suffix)]:
            result_files.append(file)
    return result_files
        
def read_file(path):
    """
    return content of log file
    """
    with open(path) as raw_jfile:
        content = load(raw_jfile)
        return content
    
def crop_list(lst):
    """
    remove entries from log file before the game starts and after the game is finished
    return cropped file as list
    """
    sub_lst = []
    for entry in lst:
        try:
            if entry["msg"] == "Game started!" and entry["user"]["name"]=="Image_Click_Bot":
                sub_lst = lst[lst.index(entry):]
            if "no images left" in entry["msg"].lower() and entry["user"]["name"]=="Image_Click_Bot":
                sub_lst = lst[:lst.index(entry)+1]
        except:
            pass
    return sub_lst

def json_filename(file):
    """
    return file name for the json file used in the current log file
    """
    for entry in file: 
        try:
            if "json file:" in entry["msg"]:
                return (entry["msg"].split()[-1])
        except:
            pass

def images_in_log(file):
    """
    split log file on new_image commands
    return list containing one section for each image in log file, 
    each of them starting with a new_image command
    """
    file = crop_list(file)
    images = []
    i = False
    i_prev = False
    for entry in [e for e in file if e["type"] == "new_image"]:
        if i:
            i_prev = i
            i = file.index(entry)
            images.append(file[i_prev:i])
        else:
            i = file.index(entry)
    images.append(file[i:]) # slice from last new_image to end of file
    return images

## additional functions

In [40]:
def extract_data(img_slice):
    """
    extract data from img slice in log file:
    return timestamps for beginning and end, duration, tracking data and click with normalised timestamps
    """
    tracking_data = []
    clicks = []
    
    for entry in img_slice: 
        try: 
            if entry["type"]=="mouse_click" and entry["data"]["element"]== "#overlayButton":
                # get t_begin: timestamp from user click on overlay button
                t_begin = entry["data"]["timestamp"]
            if entry["type"]=="mouse_positions":
                tracking_data = entry["data"]
            if entry["type"] == "mouse_click":
                clicks.append({"x": entry["data"]["coordinates"]["x"],
                                   "y": entry["data"]["coordinates"]["y"],
                                   "element":entry["data"]["element"],
                                   "timestamp":entry["data"]["timestamp"]})
        except: 
            pass
    
    for entry in img_slice[::-1]:
        # last (i.e. correct) mouse click defines t_end
        if entry["type"] == "mouse_click":
            t_end = entry["data"]["timestamp"]
            break
        
    for entry in tracking_data + clicks:
        # set timestamps relative to t_begin
        entry["timestamp"] = entry["timestamp"] - t_begin
        
    duration = (t_end - t_begin)
        
    return {"tracking_data": tracking_data,"clicks": clicks,"t_begin": t_begin, "t_end": t_end, "duration": duration}

def data_and_filenames_from_log(file_path):
    """
    read log file and return list with extracted data from log slices and corresponding file names
    """
    
    cwd = os.getcwd()
    # switch to data path
    os.chdir(cwd+"/data/")
    
    # read content from log file
    log_content = read_file(file_path)
    
    # get filename from json file used in this session
    json_file = json_filename(log_content)
    
    json_file_content = read_file("json/"+json_file)
    
    filenames = []
    # iterate through entries in json_file_content
    for entry in json_file_content:
        try:
            # fetch filenames for audio (wav and json) and image files from log file
            audio_wav = json_file_content[entry]["audio_filename"]
            audio_json = os.path.splitext(audio_wav)[0]+".json"
            img_path = json_file_content[entry]["image_filename"]
            # append file names to "filenames" list
            filenames.append({'audio_wav':audio_wav, 'audio_json': audio_json, 'img': img_path})
        except:
            pass   
    
    # split log file: separate parts for each image
    log_slices = images_in_log(log_content)
    # zip log sections with according image and audio file names (wav and json) 
    img_slices_with_paths = list(zip(log_slices, filenames))

    # extract data from image slice and pair with file names
    extracted_data_with_paths = [(extract_data(entry[0]),entry[1]) for entry in img_slices_with_paths]
    
    os.chdir(cwd)
    
    return (extracted_data_with_paths)


## read log files from the /data directory

In [4]:
cwd = os.getcwd()

In [5]:
os.chdir(cwd+"/data/")
files = find_file(".log", os.getcwd())
print (files)
os.chdir(cwd)

['2018-12-19 10-15-20-meetup 1.log']


## extract data

In [117]:
preprocessed_data = data_and_filenames_from_log(files[0])[0]
log_data, filenames = preprocessed_data

## set up textgrid file

In [118]:
# https://textgridtools.readthedocs.io/en/stable/api.html

# create new textgrid file
with open("test.textgrid", "w") as tg:
    tg.write('File type = "ooTextFile"\nObject class = "TextGrid"\n')
    
# define new textgrid object
textgrid = tgt.core.TextGrid(filename='test.textgrid')

# define tiers
clicks_tier =  tgt.core.PointTier(start_time=0, end_time=(log_data["duration"]/1000)+100, name='clicks', objects=None)
movement_tier = tgt.core.PointTier(start_time=0, end_time=(log_data["duration"]/1000)+100, name='movement', objects=None)
words_tier = tgt.core.IntervalTier(start_time=0, end_time=(log_data["duration"]/1000)+100, name='words', objects=None)
phonemes_tier = tgt.core.IntervalTier(start_time=0, end_time=(log_data["duration"]/1000)+100, name='phonemes', objects=None)

# add tiers to textgrid object
textgrid.add_tiers((clicks_tier, movement_tier, words_tier, phonemes_tier))

# write changes to file
tgt.write_to_file(textgrid, "test.textgrid", format='short')

## read audio segmentation file

In [119]:
audio_segmentation_file = filenames['audio_json']
audio_segmentation = read_file("data/audio/"+audio_segmentation_file)
words = []
phonemes = []
for entry in audio_segmentation:
    if entry['type'] == 'word':
        words.append(entry)
    elif entry['type'] == 'phoneme':
        phonemes.append(entry)

## add data to textgrid

In [120]:
for entry in log_data["clicks"]:
    # add mouseclicks to clicks tier
    clicks_tier.add_point(tgt.core.Point(entry['timestamp']/1000, text="{x},{y}".format(x=entry['x'], y=entry['y'])))
for entry in  log_data["tracking_data"]:
    # add tracking positions to movement tier
    movement_tier.add_point(tgt.core.Point(entry['timestamp']/1000, text="{x},{y}".format(x=entry['x'], y=entry['y'])))
for entry in words: 
    words_tier.add_interval(tgt.core.Interval(entry["start"], entry["end"], text=entry['content']))
    pass
for entry in phonemes: 
    phonemes_tier.add_interval(tgt.core.Interval(entry["start"], entry["end"], text=entry['content']))
    
# save changes to file
tgt.write_to_file(textgrid, "test.textgrid", format='long')

## write changes to textgrid file

In [121]:
# convert to appropriate format
elan = tgt.io.export_to_elan(textgrid, encoding='utf-8', include_empty_intervals=False, include_point_tiers=True, point_tier_annotation_duration=0.04)
long_textgrid =  tgt.io.export_to_long_textgrid(textgrid)

# write to file 
with open("test.textgrid", "w") as tg:
    tg.write(long_textgrid)

In [108]:
cwd = "/home/simeon/Dokumente/mouse-tracking-experiment/evaluation/"
os.chdir(cwd)

In [None]:
tgt.core.Interval(start_time, end_time, text='')