### Post Edited Text

In [712]:
import datetime
from dateutil.parser import parse
import xmltodict

from collections import namedtuple
from collections import OrderedDict

# Record format for Translog
Record = namedtuple('Record',['source', 'targetUpdated', 'captured_keystrokes','last_timestamp'])

def processTradosPETask(xmlDoc, recordNumber=None, debug=False):
    
    # Stores the keystores for each segment in a dictionary
    recorded_keystrokes_dict = OrderedDict()
    
    started_time = xmlDoc['QualitivityProfessional']['Client']['Project']['Activity']['@started']
    end_time = xmlDoc['QualitivityProfessional']['Client']['Project']['Activity']['@stopped']
    source_lang = xmlDoc['QualitivityProfessional']['Client']['Project']['Activity']['Document']['@sourceLang']
    if source_lang:
        source_lang = source_lang.split('-')[0]
    target_lang = xmlDoc['QualitivityProfessional']['Client']['Project']['Activity']['Document']['@targetLang']
    if target_lang:
        target_lang = target_lang.split('-')[0]
    print(target_lang, source_lang)
    project_name = xmlDoc['QualitivityProfessional']['Client']['Project']['@name']
    
    # To store the timestamp of the first keystroke of the first ever record
    first_timestamp = 0.0
    # To store the timestamp of last keystroke of previous record
    last_timestamp = 0.0
    records = xmlDoc['QualitivityProfessional']['Client']['Project']['Activity']['Document']['Record']
    recoveredText = ''
    # To extract details for a single record
    if recordNumber:
        for record in records:
            #record = records[record_number]
            recordId = int(record['@id'])
            if recordId == recordNumber:
                targetUpdated = record['contentText']['targetUpdated']
                # For some records, the field targetUpdated is absent
                # We assume this record as "not translated"
                if not targetUpdated:
                    print(f"[WARN] No targetUpdated found!!")
                    return
                
                capturedData, first_timestamp = processRecordPE(record, first_timestamp, last_timestamp, debug)
                recorded_keystrokes_dict[recordId] = capturedData
                break
        print(f"[INFO] targetUpdated: {targetUpdated}",end='\n\n')
       
            
    # To extract the whole trados xml file
    else:
        
        for ind, record in enumerate(records):
           
            targetUpdated = record['contentText']['targetUpdated']
            recordId = record['@id']
            
            # For some records, the field targetUpdated is absent
            # We assume this record as "not translated"
            # We skip this record
            if not targetUpdated:
                print(f"[WARN] No targetUpdated found for Record Id: {recordId}!!")
                continue
                
            capturedData, first_timestamp = processRecordPE(record, first_timestamp, last_timestamp, debug)
            last_timestamp = capturedData.last_timestamp
            recorded_keystrokes_dict[recordId] = capturedData

    return recorded_keystrokes_dict, started_time, end_time, source_lang, target_lang, project_name
    
def processRecordPE(record, first_timestamp, last_timestamp, debug=False):
    """
    This method processes each segment(record) of Trados Post Editing XML file
    """
    # Stores the list of keystrokes
    captured_keystrokes = []
    
    ts = 0.0
    last_ts = 0.0
    source = record['contentText']['source']
    targetOriginal = record['contentText']['targetOriginal']
    targetUpdated = record['contentText']['targetUpdated']
    keystrokes = record['keyStrokes']['ks']
    recordId = record['@id']
    
    if targetOriginal:
        original_text = targetOriginal
    else:
        original_text = ''
    
    for ks in keystrokes:
        # In some cases, the keystrokes only has 1 record
        # In that case, return the text field as the translated text
        if isinstance(ks,str):
            return Record(source, targetUpdated, captured_keystrokes, last_timestamp), first_timestamp
        
        system = ks.get('@system')
        # The system attribute contains the MT translation
        # Fetch that text and use it at initial original_text
        if system:
            original_text = ks.get('@text')
            break
            
    if debug: print(f"[INFO] Target Original Text: {original_text}")
    
    # Convert the target original_text to array of characters
    orig_text = [w for w in original_text]
    
    # Assign the first_timestamp to the last_timestamp recorded of the last record
   
    for ks in keystrokes:
        text = ks.get('@text')
        key = ks.get('@key')
        position = ks.get('@position')
        pos = int(position)
        created = ks.get('@created')
        selection = ks.get('@selection')
        system = ks.get('@system')
        
        if system:
            continue
        
        # Timestamp calculation from unix timestamp to time in miliseconds
        tt = parse(created)
        tt = tt.timestamp()
        
        if first_timestamp == 0.0:
            first_timestamp = tt
        ts = tt - first_timestamp
        # convert tt into miliseconds
        ts = int(ts*1000)
        last_ts = ts
        #print(f"record ID: {recordId}, pos: {pos}, timestamp: {ts}")
        # Skip this keystroke as it contains the MT translation and is taken care of earlier
        if system:
            continue
        # If the keystroke has non empty "selection" attribute
        if selection:
            if debug:
                print(f"[DEBUG] Select and delete Characters")
            
            orig_text, ks_list = extractSelectionKeystrokesPE(orig_text, selection, pos, text, key, ts, debug)
            curr_updated_text = ''.join(orig_text)
            if debug:
                print(f"[DEBUG] Current Updated Text = '{curr_updated_text}'")
            for ks in ks_list:
                captured_keystrokes.append(ks)
        # Keystroke is either Insert or Delete
        else:
            # Stores the type of operation - insert or delete
            op_type = ''
            
            # So far this funtionality is not used
            if key == '[BACK]':
                opType = "delete"
                del(orig_text[pos])
                curr_updated_text = ''.join(orig_text)
                if debug:
                    print("[DEBUG] Deleting characters")
                    print(f"[DEBUG] Current Updated Text = '{curr_updated_text}'")
                
            else:
                opType = "insert"
                for index, char in enumerate(text):
                    orig_text.insert(pos + index, char)
                curr_updated_text = ''.join(orig_text)
                if debug:
                    print(f"[DEBUG] Inserting characters")
                    print(f"[DEBUG] Current Updated Text = '{curr_updated_text}'")
                   
            target_ks = {'Time': str(ts), 'Cursor': position, 'Type': opType, 'Value': text}
            captured_keystrokes.append(target_ks)
    
    if debug: print(f"[DEBUG] The recovered text: {curr_updated_text}")
     # Validation
    if (targetUpdated == curr_updated_text):
        print(f"[INFO] The recovered text matches targetUpdated for Record Id: {recordId}")
    else:
        print(f"[WARN] The recovered text doesn't match targetUpdated for Record Id: {recordId}")
        print(f"\t[ERROR] The recovered text: {curr_updated_text}")
    #last_timestamp = ts   
    #print(f"last_ts: {last_ts}")
    return Record(source, targetUpdated, captured_keystrokes, last_ts), first_timestamp
        
        
def extractSelectionKeystrokesPE(orig_text, selection, position, text, key, time, debug):
    ks_list = []
    curr_updated_text = ''.join(orig_text)
    
    if debug: print(f"[DEBUG] Updated Text: {curr_updated_text}")
        
    start = position
    end = position + len(selection)
    remove_index = [i for i in range(start,end)]
    
    if debug:
        print(f"\t[DEBUG] To delete at position {position}: character {orig_text[position]} : '{''.join(orig_text[start:end])}'")
        print(f"\t[DEBUG] Selection: '{selection}'")
    
    # Delete characters
    del(orig_text[start:end])
    
    # Create a keystroke entry for delete
    target_ks = {'Time': str(time), 'Cursor': start, 'Type': "delete", 'Value': selection}
    ks_list.append(target_ks)
    
    # Insert Space
    if key == '[Space]':
        orig_text.insert(start,' ')
        # Create a keystroke entry for insert
        target_ks = {'Time': str(time), 'Cursor': start, 'Type': "insert", 'Value': ' '}
        ks_list.append(target_ks)
   
    else:
        # Insert characters
        if text:
            for i,c in enumerate(text):
                orig_text.insert(start+i,c)
            # Create a keystroke entry for insert
            target_ks = {'Time': str(time), 'Cursor': start, 'Type': "Insert", 'Value': ' '}
            ks_list.append(target_ks)
           
    if debug: print(f"[DEBUG] Updated Text: {''.join(orig_text)}")
    
    return orig_text, ks_list

In [723]:
def generateTranslogXmlPE(trados_records, started_time, end_time, source_lang, target_lang, project_name, target_xml=OrderedDict(), insertLineBreak=True, debug=False):
    if not isinstance(target_xml, OrderedDict):
        print("[ERROR] Enter a valid xml file")
        return
    
    if not trados_records:
        return
    
    #linebreak_ks = {'Time': str(time), 'Cursor': start, 'Type': "Insert", 'Value': ' '}
    position = 0
    all_keystrokes = []
    final_source_text = ''
    final_target_text = ''
    for recordId in trados_records.keys():
        record = trados_records.get(recordId)
        
        sourceText = record.source
        targetText = record.targetUpdated
        keystrokes = record.captured_keystrokes
        last_timestamp = record.last_timestamp
        
        if insertLineBreak:
            sourceText += '\n'
            targetText += '\n'
            position += len(targetText)
            linebreak_ks = {'Time': str(last_timestamp), 'Cursor': str(position), 'Type': "Insert", 'Value': '\n'}
            keystrokes += [linebreak_ks]
        final_source_text += sourceText
        final_target_text += targetText
        all_keystrokes += keystrokes
        
    target_xml = addKeystrokesPE(all_keystrokes, target_xml)
    target_xml = addSourceTextPE(final_source_text, target_xml)
    target_xml = addTargetTextPE(final_target_text, target_xml)
    target_xml = addSourceTextCharPE(final_source_text, target_xml)
    target_xml = addTargetTextCharPE(final_target_text, target_xml)
    
    target_xml['LogFile']['startTime'] = started_time
    target_xml['LogFile']['endTime'] = end_time
    target_xml['LogFile']['Project']['FileName'] = project_name
    target_xml['LogFile']['Project']['Languages']['@source'] = source_lang
    target_xml['LogFile']['Project']['Languages']['@target'] = target_lang

    return target_xml
    

In [719]:
a = "123"
a = a + "4"
a

'1234'

In [711]:
def addKeystrokesPE(keystrokes, target_xml, debug=False):
    
    if target_xml.get('LogFile').get('Events'):
        target_xml['LogFile']['Events']['Key'] = []
    else:
        target_xml['LogFile']['Events'] = OrderedDict()
        target_xml['LogFile']['Events']['Key'] = []
        
    keys = target_xml['LogFile']['Events']['Key']
    keys.append(OrderedDict())
    for ks in keystrokes:
        keys.append(addKsToDict(ks))
    
    target_xml['LogFile']['Events']['Key'] = keys
    
    return target_xml

def addKsToDict(keystrokes_dic):
    new_dict = {'@Value': keystrokes_dic.get('Value'), '@Time': keystrokes_dic.get('Time'), '@Cursor': keystrokes_dic.get('Cursor'), '@Type': keystrokes_dic.get('Type')}
    return OrderedDict(new_dict)

def addSourceTextPE(sourceText, target_xml):
    target_xml['LogFile']['Project']['Interface']['Standard']['Settings']['SourceText'] = sourceText
    return target_xml

def addSourceTextCharPE(sourceText, target_xml):
    sourceTextChar = []
    target_xml['LogFile']['SourceTextChar']['CharPos'] = []
    for ind, char in enumerate(sourceText):
        sourceTextChar.append(OrderedDict({'@Cursor': str(ind), '@Value': char}))    
    target_xml['LogFile']['SourceTextChar']['CharPos'] = sourceTextChar
    return target_xml

def addTargetTextCharPE(targetText, target_xml):
    targetTextChar = []
    target_xml['LogFile']['FinalTextChar']['CharPos'] = []
    for ind, char in enumerate(targetText):
        targetTextChar.append(OrderedDict({'@Cursor': str(ind), '@Value': char}))    
    target_xml['LogFile']['FinalTextChar']['CharPos'] = targetTextChar
    return target_xml

def addTargetTextPE(targetText, target_xml):
    target_xml['LogFile']['Project']['Interface']['Standard']['Settings']['TargetText'] = targetText
    return target_xml

### Read Source Trados xml

In [724]:
with open('PE_EN-PT_2.xml',encoding='utf-8') as fd:
    doc = xmltodict.parse(fd.read(),encoding='utf-8')

In [725]:
captured_trados_data, started_time, end_time, source_lang, target_lang, project_name = processTradosPETask(doc)

pt en
[WARN] No targetUpdated found for Record Id: 1!!
[INFO] The recovered text matches targetUpdated for Record Id: 2
[INFO] The recovered text matches targetUpdated for Record Id: 3
[INFO] The recovered text matches targetUpdated for Record Id: 4
[INFO] The recovered text matches targetUpdated for Record Id: 6
[INFO] The recovered text matches targetUpdated for Record Id: 7
[INFO] The recovered text matches targetUpdated for Record Id: 8
[INFO] The recovered text matches targetUpdated for Record Id: 9
[INFO] The recovered text matches targetUpdated for Record Id: 10
[INFO] The recovered text matches targetUpdated for Record Id: 11
[INFO] The recovered text matches targetUpdated for Record Id: 12


### Read target Translog template xml

In [726]:
with open('translog_template.xml',encoding='utf-8') as fd:
    target_xml = xmltodict.parse(fd.read(),encoding='utf-8')

In [727]:
updated_xml = generateTranslogXmlPE(captured_trados_data, started_time, end_time, source_lang, target_lang, project_name, target_xml)
#updated_xml = xmltodict.unparse(updated_xml)

In [730]:
#updated_xml = xmltodict.unparse(updated_xml,)
f = open('translog_new_PE1.xml','w', encoding='utf-8')
#f.write(updated_xml)
#f.close()

In [731]:
xmltodict.unparse(updated_xml,output=f,pretty=True, short_empty_elements=True)
f.close()

# End Post Edit

In [640]:
with open('latest_PE.xml',encoding='utf-8') as fd:
    doc_new = xmltodict.parse(fd.read(),encoding='utf-8')

In [None]:
<cf font=Georgia color=121212 size=12>Vieni per l’\xa0</cf><hyperlink value="https://www.theguardian.com/world/2019/may/28/uk-and-territories-are-greatest-enabler-of-tax-avoidance-study-says">elusione fiscale</hyperlink><cf font=Georgia color=121212 size=12>, e finisci per restare per le tartarughe.</cf>
<cf font=Georgia color=121212 se finisci pei rze=1re2>Vieni per l’ </cf><hyperlink value="https://www.theguardian.com/world/2019/may/28/uk-and-territories-are-greatest-enabler-of-tax-avoidance-study-says">elusione fiscale</hyperlink><cf font=Georgia color=121212 size=12>, resta per le tartarughe.</cf>

In [687]:
# throws error - has missing keystrokes - 3, 4,19,21,
# works, no keystrokes - 1,2,11,12,13,14,15,16,17,18,20,
# works with keystrokes - 6,7,9,22,
# doesnt work - need to check - 10
# doesnt work - contains html 5, 8, 
processTradosPETask(doc_new,27)

[INFO] The recovered text matches targetUpdated for Record Id: 27
[INFO] targetUpdated: Con quasi nessun parlamentare senza incarico presente - solo i lobbisti e qualche membro irriducibile si sono presentati - l'obiettivo principale dei ministri era di superare i quattro giorni di conferenza senza metterci piede, altrimenti avrebbero fatto capire che non hanno idea di quale sia il piano Brexit o avrebbero detto la cosa sbagliata.



(OrderedDict([(27,
               Record(source='With almost no backbench MPs present – only lobbyists and the odd diehard member bother to turn up – the main goal of ministers was to get through the four days without putting their foot in it by letting on that they didn’t have a clue what the Brexit plan was or saying the wrong thing.', targetUpdated="Con quasi nessun parlamentare senza incarico presente - solo i lobbisti e qualche membro irriducibile si sono presentati - l'obiettivo principale dei ministri era di superare i quattro giorni di conferenza senza metterci piede, altrimenti avrebbero fatto capire che non hanno idea di quale sia il piano Brexit o avrebbero detto la cosa sbagliata.", captured_keystrokes=[{'Time': '0', 'Cursor': 228, 'Type': 'delete', 'Value': 'facendo '}, {'Time': '0', 'Cursor': 228, 'Type': 'Insert', 'Value': ' '}, {'Time': '149', 'Cursor': '229', 'Type': 'insert', 'Value': 'l'}, {'Time': '297', 'Cursor': '230', 'Type': 'insert', 'Value': 't'}, {'Time': '40

### Translated Text

In [None]:
with open('Translation_PT-EN.xml',encoding='utf-8') as fd:
    translated_doc = xmltodict.parse(fd.read(),encoding='utf-8')

- while inserting, it starts inserting at index = 1
- that means if we enter [how], h=1, o=2, w=3, but it actually gets stored at h=0, w=1, w=2
- while deleting, it deletes at the actual index
- suppose it wants to delete w, it will do so at index=2, rather than 3

In [247]:
def extractTargetText1(xmlDoc,record_number=5):
    records = xmlDoc['QualitivityProfessional']['Client']['Project']['Activity']['Document']['Record']
    #for record in records:
    record = records[record_number]
    source = record['contentText']['source']
    targetOriginal = record['contentText']['targetOriginal']
    targetUpdated = record['contentText']['targetUpdated']
    keystrokes = record['keyStrokes']['ks']
    #print(keystrokes)
    print(targetUpdated,end='\n\n')
    extractedText = constructTargetText1(keystrokes, targetOriginal)
    return extractedText,targetUpdated
        
    
def constructTargetText1(keystrokes, targetOriginal):
    special_keys = ['[Back]','[Space]']
    
    if targetOriginal:
        orig_text = [i for i in targetOriginal]
    else:
        orig_text = []
    
    print(len(orig_text))
   
    for ks in keystrokes:
        text = ks.get('@text')
        key = ks.get('@key')
        position = ks.get('@position')
        pos = int(position)
        created = ks.get('@created')
        selection = ks.get('@selection')
        system = ks.get('@system')
        if orig_text:
            print(f"\n[DEBUG] At {position}  ---> text: '{text}' ----> key pressed: '{key}' ---> selection: '{selection}'")
        if selection:
            temp = handle_selection1(orig_text, selection, pos, text, key)
            orig_text = temp
        elif key == '[BACK]':
            print("[DEBUG] Delete text")
            #pass
        else:
            print("[DEBUG] Insert Text")
            if len(text) > 1:
                for i,c in enumerate(text):
                    orig_text.insert(pos+i,c)
            else:
                orig_text.insert(pos,text)
            updated_text = ''.join(orig_text)
            print(f"'{updated_text}'")
    return orig_text
        
def handle_selection1(temp, selection, position, text, key):
    print("[DEBUG] Select and Modify text")
   # position = int(position)
    #print(''.join(temp))
    if len(selection) == 1:
        #if key == '[Back]':
        start = position - len(selection)+1
        end = position+1
        remove_index = [i for i in range(start,end)]
        print(f"[DEBUG] index to remove----> {remove_index}")
        #delete the characters
        to_delete = ''.join(temp[start:end])
        print(f"[DEBUG] To delete: '{to_delete}'")
        print(f"[DEBUG] Selection: '{selection}'")
        if to_delete != selection:
            print(f"[ERROR] Selection string doesn't match to_delete string!!!")
        del(temp[start:end])
        #else:
            
    elif len(selection) > 1:
        if key == '[Back]':
            start = position - 1 + 1
            end = start+len(selection)+1
        else:
              # creates problem in record 5
              
            start = position - 1
            # +1 deletes an extra
            end = start+len(selection)
        remove_index = [i for i in range(start, end)]
        print(f"[DEBUG] index to remove----> {remove_index}")
        #delete the characters
        to_delete = ''.join(temp[start:end])
        print(f"[DEBUG] To delete multiple: '{to_delete}'")
        print(f"[DEBUG] Multiple Selection: '{selection}'")
        if to_delete != selection:
              print(f"[ERROR] Selection string doesn't match to_delete string!!!")
        del(temp[start:end])
    #start = start+1
    if key == '[Space]':
        temp.insert(start,' ')
    #elif key == '[Back]':
        # Do nothing as its already deleted above
        #pass
    else:
        #print(f"length of text = {len(text)}")
        # if text field is non empty
        #text = text.rstrip()
        if text:
            for i,c in enumerate(text):
                temp.insert(start+i,c)
           
    print(''.join(temp))
    
    return temp


# Capture keystrokes from Qualitifity source xml file

In [320]:
with open('Translation_PT-EN.xml',encoding='utf-8') as fd:
    translated_doc = xmltodict.parse(fd.read(),encoding='utf-8')

In [None]:
def sourceTextPosDict(sourceText):
    pass

def targetTextPosDict(targetText):
    pass

In [346]:
import datetime
from dateutil.parser import parse

def processTradosXml(source_xml, record_number=2, debug=False):
    # To store the keystores for Translog
    captured_keystrokes = []
    # To store the previous timestamp of each keystroke
    first_ts = 0.0
    
    records = source_xml['QualitivityProfessional']['Client']['Project']['Activity']['Document']['Record']
    #for record in records:
    record = records[record_number]
    source = record['contentText']['source']
    targetOriginal = record['contentText']['targetOriginal']
    targetUpdated = record['contentText']['targetUpdated']
    keystrokes = record['keyStrokes']['ks']
    
    # orig_text keeps the track of the reconstructed text so far
    if targetOriginal:
        orig_text = [i for i in targetOriginal]
    else:
        orig_text = []
    
    for count, ks in enumerate(keystrokes):
        text = ks.get('@text')
        key = ks.get('@key')
        position = ks.get('@position')
        pos = int(position)
        created = ks.get('@created')
        selection = ks.get('@selection')
        system = ks.get('@system')
        
        tt = parse(created)
        tt = tt.timestamp()
        if count == 0:
            first_ts = tt
            tt = 0
        else:
            tt = tt - first_ts
        # convert tt into miliseconds
        tt = int(tt*1000)
        
        if selection:
            print("[DEBUG] Handling selection(first delete and then insert) characters")
            orig_text, ks_list = extractSelectionKeystrokes(orig_text, pos, text, key, tt, selection)
            #curr_updated_text = ''.join(orig_text)
            #print(f"[DEBUG] Current Text = '{curr_updated_text}'")
            for ks in ks_list:
                captured_keystrokes.append(ks)
        else:
            opType = ''
            if key == '[Back]':
                del(orig_text[pos])
                curr_updated_text = ''.join(orig_text)
                if debug:
                    print("[DEBUG] Deleting characters")
                    print(f"[DEBUG] Current Text = '{curr_updated_text}'")
                opType = "delete"
            else:
                if pos == 0:
                    pos =1
                for index,char in enumerate(text):
                    orig_text.insert(pos-1+index, char)
                curr_updated_text = ''.join(orig_text)
                if debug:
                    print("[DEBUG] Inserting characters")
                    print(f"[DEBUG] Current Text = '{curr_updated_text}'")
                
                opType = "insert"
        
            target_ks = {'time': str(tt), 'cursor': position, 'type': opType, 'value': text}
            captured_keystrokes.append(target_ks)
    print(f"[INFO] Original Text: {''.join(orig_text)}")
        
    return source, targetUpdated, captured_keystrokes
    
def extractSelectionKeystrokes(orig_text, position, text, key, time, selection):
    curr_updated_text = ''.join(orig_text)
    print(f"[DEBUG] Current Text before updating = '{curr_updated_text}'")
    ks_list = []
    if(len(selection) == 1):
        # Delete at position
        start = position
        print(f"\t[DEBUG]Single: key = {key} : character at position: {start} is '{orig_text[start]}', selection = '{selection}'")
        if selection == orig_text[start]:
            print(f"\t\t[DEBUG] Deleting '{orig_text[start]}'")
            del(orig_text[start])
        elif selection == orig_text[start-1]:
            print(f"\t\t[DEBUG] Deleting '{orig_text[start-1]}'")
            del(orig_text[start-1])
        else:
            print("[ERROR] selection not found")
    else:
        
        #if key == '[Back]':
        # Delete at position
        start = position
        end = start + len(selection) + 1
        #else:
            #start = position - 1
            # +1 deletes an extra
            #end = start + len(selection)
        print(f"\t[DEBUG]Multiple: key = {key} : characters around position {position}({orig_text[position]}): '{''.join(orig_text[position-2:position+1])}'")    
        remove_index = [i for i in range(start, end)]
        print(f"[DEBUG] index to remove: {remove_index}")
        #delete the characters
        to_delete = ''.join(orig_text[start:end])
        print(f"[DEBUG] To delete multiple: '{to_delete}'")
        print(f"[DEBUG] Multiple Selection: '{selection}'")
        if to_delete != selection:
              print(f"[ERROR] Selection string doesn't match to_delete string!!!")
        del(orig_text[start:end])
    
    target_ks = {'time': str(time), 'cursor': start, 'type': "delete", 'value': selection}
    ks_list.append(target_ks)
    
    # Insert the characters in the text field
    if position == 0:
        position = 1
    if key == '[Space]':
        orig_text.insert(position-1,' ')
        target_ks = {'time': str(time), 'cursor': start, 'type': "insert", 'value': ' '}
        ks_list.append(target_ks)
    else:
        
        if text:
            for i,c in enumerate(text):
                orig_text.insert(position-1+i,c)
            target_ks = {'time': str(time), 'cursor': start, 'type': "insert", 'value': text}
            ks_list.append(target_ks)

    return orig_text, ks_list


def addKeystrokes(keystrokes, target_xml, debug=False):
    if not isinstance(target_xml, OrderedDict):
        print("[ERROR] Enter a valid xml file")
        return
    if not keystrokes:
        print("[ERROR] No keystores to add")
        return
    if target_xml.get('LogFile').get('Events'):
        target_xml['LogFile']['Events']['Key'] = []
    else:
        target_xml['LogFile']['Events'] = OrderedDict()
        target_xml['LogFile']['Events']['Key'] = []
        
    keys = target_xml['LogFile']['Events']['Key']
    for ks in keystrokes:
        keys.append(addKsToDict(ks))
    
    target_xml['LogFile']['Events']['Key'] = keys
    
    return target_xml

def addSourceText(sourceText, target_xml):
    target_xml['LogFile']['Project']['Interface']['Standard']['Settings']['SourceText'] = sourceText
    return target_xml

def addTargetText(targetText, target_xml):
    target_xml['LogFile']['Project']['Interface']['Standard']['Settings']['TargetText'] = targetText
    return target_xml
    
from collections import OrderedDict

def addKsToDict(keystrokes_dic):
    new_dict = {'@Time': keystrokes_dic.get('time'), '@Cursor': keystrokes_dic.get('cursor'), '@Type': keystrokes_dic.get('type'), '@Value': keystrokes_dic.get('value')}
    return OrderedDict(new_dict)

In [347]:
extractedText, targetUpdated, captured_keystrokes = processTradosXml(translated_doc,2)

[DEBUG] Handling selection(first delete and then insert) characters
[DEBUG] Current Text before updating = 'The effects of the '
	[DEBUG]Single: key = [Back] : character at position: 3 is ' ', selection = ' '
		[DEBUG] Deleting ' '
[DEBUG] Handling selection(first delete and then insert) characters
[DEBUG] Current Text before updating = 'Theeffects of the '
	[DEBUG]Single: key = [Back] : character at position: 2 is 'e', selection = 'e'
		[DEBUG] Deleting 'e'
[DEBUG] Handling selection(first delete and then insert) characters
[DEBUG] Current Text before updating = 'Theffects of the '
	[DEBUG]Single: key = [Back] : character at position: 1 is 'h', selection = 'h'
		[DEBUG] Deleting 'h'
[DEBUG] Handling selection(first delete and then insert) characters
[DEBUG] Current Text before updating = 'Teffects of the '
	[DEBUG]Single: key = [Back] : character at position: 0 is 'T', selection = 'T'
		[DEBUG] Deleting 'T'
[DEBUG] Handling selection(first delete and then insert) characters
[DEBUG] Cu

In [333]:
targetUpdated

'Effects of the category-2 hurricane have already been felt in the Carolinas, with multiple tornadoes, but no victims have been reported.'

### Open a Translog xml template file

In [268]:
with open('translog_template.xml',encoding='utf-8') as fd:
    target_xml = xmltodict.parse(fd.read(),encoding='utf-8')

In [269]:
updated_xml = addKeystrokes(captured_keystrokes, target_xml)
#updated_xml = addSourceText(extractedText, target_xml)
updated_xml = addTargetText(targetUpdated, target_xml)

In [270]:
new_xml = xmltodict.unparse(updated_xml)

### Save target File

In [271]:
f = open('translog_template_new.xml','w')
f.write(new_xml)
f.close()