In [1]:
import glob     # Import glob to easily loop over files
import pympi    # Import pympi to work with elan files
import nltk     # Import nltk to use tokenizer
import pandas as pd # Import pandas to create data tables
import shutil # Import shutil to remove results from prior runs recursively
import os #Import os to get the current working directory 


In [2]:
# @name correctionSearch
# @param notFoundNode (corrected) Annotation not found until now in given list.
# @param all_node_names all valid annotation lables
# @param foundExistingNodes return paramater that collects all found and corrected lables
# @param file the filename of the current file
# @param role current speaker role (typically EE or EX)
# @param interval the time intervall of the current annotation
# @param warninglog the string of all warnings thrown while execution
# @param initNode the unccorected annatation lable 
# @returns boolean value indicating that a recursive search through the correction space has yield a valid annotation + the warning log string
# 
# This function performs a depth-first search through all possoble corrections of an annotation lable 
def correctionSearch(notFoundNode, corrections, all_node_names, foundExistingNodes, file, role, interval, warninglog, initNode):
    nodeUnknown = True
    for correction in corrections:
        if correction(notFoundNode).replace("X","") in all_node_names:
            foundExistingNodes.append(correction(notFoundNode))
            warninglog = warninglog+"Node renamed: "+initNode+" -> "+correction(notFoundNode)+"; File: "+file+"; Role: "+role+"; Timeslot"+interval+"\n"
            print(warninglog)
            return False, warninglog
    if nodeUnknown:
        for corIdx in range(len(corrections)):
            correction = corrections[corIdx]
            ncorrections = corrections.copy()
            del ncorrections[corIdx]
            broadSearchRes = correctionSearch(correction(notFoundNode), ncorrections, all_node_names, foundExistingNodes, file, role, interval, warninglog, initNode)
            warninglog = broadSearchRes[1]
            nodeUnknown = nodeUnknown and broadSearchRes[0]
            if not nodeUnknown:
                return False, warninglog
    return nodeUnknown, warninglog
        

In [3]:
# These are the correction functions. They take a given lable as input and produce a sugestion for a correction as output

# @name correctionPoint
# @param annotation An annotation lable that was not found in the given list of lables
# return a suggestion for a correction i.e. the "corrected lable"
#
# This correction function adds a point to the input lable
def correctionPoint(annotation):
    prepend =""
    if "X" in annotation:
        prepend="X"
        annotation = annotation.replace("X","")
    return annotation +  "."+prepend

# @name correctionXSpace
# @param annotation An annotation lable that was not found in the given list of lables
# return a suggestion for a correction i.e. the "corrected lable"
#
# This correction function replaces Xs
def correctionXSpace(annotation):
    if "X" in annotation:
        return annotation.replace("X","").strip()+"X"
    elif "x" in annotation:
        return annotation.replace("x","").strip()+"X"
    else:
        return annotation

# @name correctionSpelling
# @param annotation An annotation lable that was not found in the given list of lables
# return a suggestion for a correction i.e. the "corrected lable"
#
# This correction function replaces labels that previously were used with the new lables
def correctionSpelling(token):
    prepend =""
    if "X" in token:
        prepend="X"
        token = token.replace("X","")
    if token == "1.0":
        token = "1"
    elif token == "1.0.":
        token = "1"
    elif token == "1.":
        token = "1"
    elif token == "8.2.1":
        token = "8.2"
    elif token == "5.7.":
        token = "6.2.2."
    return token+prepend

In [4]:
# @name correctNodeNames
# @param annotation the annotationa label in the ELAN file
# @param all_node_names all valid labels
# @param errorlog a string that collects all Node unknowns (where no correction to a valid lable was found)
# @paran warningloga  string that collects all Node renames (where a correction to a valid lable was found)
# @paran file the file name of the ELAN file that will be corrected
# @param role the speaker (-role). In the current version always EE or EX
# @param interval the time interval of the current label
# @param eaf the api-object of the ELAN file
# @return errorlog a string of all node unknows
# @return warninglog a string of all node renames
#
# This function applies corrections to all invalid labels (if possible) and write them back to the ELAN file
def correctNodeNames(annotation, all_node_names, errorlog, warninglog, file, role, interval, eaf):
    annotationText = annotation[2]
    foundNodes = annotationText.split(",")
    foundNodes = list(map(lambda x: x.strip(),foundNodes))
    foundNotExistingNodes = list(filter(lambda x: (not x in all_node_names and not x.replace("X", "").strip() in all_node_names) and not x == "",foundNodes)) 
    foundExistingNodes = list(filter(lambda x: x in all_node_names or x.replace("X", "").strip() in all_node_names,foundNodes))
    # correctionXSpace must be the last one !!!!!
    corrections = [correctionXSpace, correctionPoint, correctionSpelling]

    for notExNode in foundNotExistingNodes:
        correctionUnsuccessful, warninglog = correctionSearch(notExNode, corrections, all_node_names, foundExistingNodes, file, role, interval, warninglog, notExNode)    
        if correctionUnsuccessful:
            errorlog = errorlog+"Node unknown: "+notExNode+"; File: "+file+"; Role: "+role+"; Timeslot"+interval+"\n"
    if len(foundNotExistingNodes) > 0: 
        eaf.remove_annotation(role+"_Gamenodes", annotation[0])
        eaf.add_annotation(role+"_Gamenodes",annotation[0],annotation[1]-1 ,str(",".join(foundExistingNodes)))

    #print("Change annotation "+annotation[2]+interval+" to "+ str(",".join(foundExistingNodes)))
    return errorlog, warninglog


    

In [5]:
# @name ms2human_readable
# @param ms the start or end label of an annotation
# @returns Timestemp in a human-readable format
#
# This function converts a timestenp in milliseconds to a human readable time stamp
def ms2human_readable(ms):
    mss = ms % 1000
    sec = int(ms / 1000)
    min = int(sec/60)
    sec = sec - min*60
    h = int(min/60)
    min = min - h*60
    return str(h)+":"+str(min)+":"+str(sec)+" "+str(mss)

In [6]:
# load valid lables
keys = pd.read_csv("Nodeaufstellung.csv")["NodeID"].to_list()
# Remove old results
shutil.rmtree(os.getcwd()+'\\out')
os.mkdir("out")
files = glob.glob("in/*.eaf")
errorlog = ""
warninglog = ""
# Interate through all ELAN files in the in folder and apply the corrections
for file in files:
    eafob = pympi.Elan.Eaf(file)
    singleTextEX = "( EX "+file+")"
    singleTextEE = "( EE "+file+")"
    for annotation in eafob.get_annotation_data_for_tier("EX_Gamenodes"):
        errorlog, warninglog = correctNodeNames(annotation,keys, errorlog, warninglog, file, "EX", "["+str(ms2human_readable(annotation[0]))+";"+str(ms2human_readable(annotation[1]))+"]",eafob)
    for annotation in eafob.get_annotation_data_for_tier("EE_Gamenodes"):
        errorlog, warninglog = correctNodeNames(annotation,keys, errorlog, warninglog, file, "EE", "["+str(ms2human_readable(annotation[0]))+";"+str(ms2human_readable(annotation[1]))+"]",eafob)
    oldFileNameSplit = file.split("\\")[1].split(".")
    pympi.Elan.to_eaf("out/"+oldFileNameSplit[0]+"_changed."+oldFileNameSplit[1],eafob)
with open("out/error.log","w") as f:
    f.write(errorlog)
with open("out/warning.log","w") as f:
    f.write(warninglog)

Node renamed: 5.4.2 -> 5.4.2.; File: in\VP23_P2_A01.eaf; Role: EX; Timeslot[0:4:42 721;0:4:51 121]

Node renamed: 5.4.2 -> 5.4.2.; File: in\VP23_P2_A01.eaf; Role: EX; Timeslot[0:4:42 721;0:4:51 121]
Node renamed: 1.0. -> 1; File: in\VP26_P2_A01.eaf; Role: EX; Timeslot[0:0:5 240;0:0:8 399]

Node renamed: 5.4.2 -> 5.4.2.; File: in\VP23_P2_A01.eaf; Role: EX; Timeslot[0:4:42 721;0:4:51 121]
Node renamed: 1.0. -> 1; File: in\VP26_P2_A01.eaf; Role: EX; Timeslot[0:0:5 240;0:0:8 399]
Node renamed: 1.0. -> 1; File: in\VP27_P2_A01.eaf; Role: EX; Timeslot[0:0:6 670;0:0:8 789]

Node renamed: 5.4.2 -> 5.4.2.; File: in\VP23_P2_A01.eaf; Role: EX; Timeslot[0:4:42 721;0:4:51 121]
Node renamed: 1.0. -> 1; File: in\VP26_P2_A01.eaf; Role: EX; Timeslot[0:0:5 240;0:0:8 399]
Node renamed: 1.0. -> 1; File: in\VP27_P2_A01.eaf; Role: EX; Timeslot[0:0:6 670;0:0:8 789]
Node renamed: 1.0. -> 1; File: in\VP29_P2_A01.eaf; Role: EX; Timeslot[0:0:2 830;0:0:5 839]

Node renamed: 5.4.2 -> 5.4.2.; File: in\VP23_P2_A01.ea