# ALL LATIN ANNOTATIONS

In [1]:
from lxml import etree as ET

import os
import sys
path=os.getcwd()

def get_xml(xml_input):
    parser = ET.XMLParser(remove_comments=False)
    xml = ET.parse(xml_input, parser=parser)
    return xml 

Creating a dictionary for all verb tokens

In [2]:
import csv  
from cassis import *  

# Function to load a Common Analysis Structure (CAS) from an XMI file
def load_cas(file_input):
    f = open('typesystem.xml', 'rb')  # Opening the typesystem.xml file in binary read mode
    typesystem = load_typesystem(f)  # Loading the type system from the typesystem.xml file
    fxmi = open(file_input, 'rb')  # Opening the XMI file in binary read mode
    cas = load_cas_from_xmi(fxmi, typesystem=typesystem)  # Loading the CAS from the XMI file using the specified type system
    return cas  # Returning the loaded CAS

# List of all input XMI files
all_files = ['Aennius, Annales.xmi', 'Apuleius, Metamorphoses 1-5.xmi', 'Caesar, De bello Gallico 1-4.xmi', 
             'Cicero, De amicitia.xmi', 'Cicero, In Catilinam 1-3.xmi', 'Horace, Satires 1.xmi', 
             'Livius, Ab Urbe condita 1-2.xmi', 'Plautus, Amphitruo.xmi', 'Plautus, Mostellaria.xmi', 
             'Propertius, Elegiae.xmi', 'Sallust, Bellum Catilinae.xmi', 'Seneca, De Ira.xmi', 
             'Seneca, Medea.xmi', 'Suetonius, Life of August.xmi', 'Tacitus, Historiae 1.xmi', 
             'Vergil, Aeneid.xmi']

# Abbreviated names for the input files
files_abbreviated = ['Aen', 'Ap', 'Caes', 'Cic_Am', 'Cic_Cat', 'Hor', 'Liv', 'Plaut_Am', 'Plaut_Most', 
                     'Prop', 'Sall', 'Sen_Ira', 'Sen_Med', 'Suet', 'Tac', 'Verg']

id2tok = dict()  # Dictionary to store IDs mapped to tokens

count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    print(file_input)  # Printing the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    # Looping through each 'Actionality' annotation in the CAS
    for relation in cas.select('webanno.custom.Actionality'):
        # Looping through each token covered by the 'Actionality' annotation
        for token in cas.select_covered('webanno.custom.Actionality', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            print(count)  # Printing the current count
            count = count + 1  # Incrementing the count
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            print(id, tok)  # Printing the token ID and its text
            id2tok[id] = tok  # Storing the token text in the dictionary with the token ID as the key


Aennius, Annales.xmi
0
7504Aen perire
1
7614Aen Concurrunt
2
7807Aen conveniunt
Apuleius, Metamorphoses 1-5.xmi
3
627Ap praeeunte
4
638Ap aggressus
5
2954Ap interii
6
3306Ap subit
7
4001Ap perveneritis
8
6965Ap obiturus
9
9077Ap proveniret
10
11123Ap aufugiamus
11
11895Ap provenire
12
13304Ap convenire
13
14102Ap transeas
14
14123Ap abeunt
15
16724Ap aggredior
16
20808Ap aufugi
17
21324Ap provenire
18
21919Ap abierunt
19
22995Ap progressus
20
23699Ap invenio
21
27261Ap abirem
22
29687Ap circumibam
23
31740Ap pervenimus
24
32160Ap introeuntibus
25
32491Ap exire
26
34900Ap evolo
27
35126Ap congredere
28
37524Ap occurrit
29
38710Ap occurrens
30
40020Ap Pereo
31
40049Ap perii
32
40257Ap Abi
33
40602Ap advenit
34
43327Ap advenis
35
44239Ap aufugit
36
46122Ap subveni
37
46705Ap subit
38
47736Ap adeunda
39
51135Ap adire
40
51515Ap aufugere
41
53490Ap obeas
42
54378Ap Abi
43
55177Ap abis
44
55297Ap abis
45
55702Ap accurro
46
55779Ap convenerant
47
57275Ap Occurrit
48
61144Ap convenissent
49
61

371
357Hor concurritur
372
1104Hor percurram
373
4871Hor abii
374
6932Hor exiret
375
10453Hor transvolat
376
11639Hor pereant
377
13082Hor evenit
378
16537Hor invenere
379
16723Hor perierunt
380
16843Hor inventa
381
17279Hor subire
382
21095Hor invenias
383
24449Hor occurram
384
25470Hor abit
385
25974Hor subimus
386
26213Hor advenit
387
28997Hor pervenimus
388
31365Hor concurrantque
389
32181Hor abeo
390
34030Hor exirem
391
35701Hor praecurreret
392
35767Hor convenit
393
36279Hor procurrunt
394
38432Hor peritura
395
39396Hor accurrit
396
39950Hor abire
397
40279Hor subiit
398
41095Hor inteream
399
42062Hor occurram
400
42174Hor occurrit
Livius, Ab Urbe condita 1-2.xmi
401
2257Liv perventum
402
2917Liv pereundi
403
3810Liv egressi
404
4149Liv egressi
405
4380Liv concurrunt
406
4758Liv exissent
407
5915Liv confugiunt
408
7964Liv convenerat
409
10166Liv invenerit
410
11990Liv pervenit
411
12751Liv ingressi
412
13183Liv intervenit
413
13824Liv congressi
414
16586Liv evenit
415
16955Liv in

727
0Plaut_Most Exi
728
73Plaut_Most egredere
729
156Plaut_Most exi
730
296Plaut_Most abi
731
305Plaut_Most abi
732
361Plaut_Most Perii
733
423Plaut_Most adveniat
734
2690Plaut_Most abi
735
3422Plaut_Most abibo
736
5513Plaut_Most abeunt
737
5987Plaut_Most advenit
738
7641Plaut_Most evenit
739
8278Plaut_Most periisti
740
9371Plaut_Most involem
741
9894Plaut_Most Perii
742
10788Plaut_Most invenire
743
12044Plaut_Most adveniat
744
14701Plaut_Most abi
745
15380Plaut_Most convenit
746
15835Plaut_Most conveniunt
747
16008Plaut_Most éffugi
748
17789Plaut_Most advenit
749
17806Plaut_Most periit
750
18021Plaut_Most subeunt
751
18135Plaut_Most excucurrerit
752
18453Plaut_Most Periimus
753
19130Plaut_Most advenit
754
19236Plaut_Most periisti
755
19296Plaut_Most advenit
756
19439Plaut_Most adveniens
757
19984Plaut_Most Perii
758
20136Plaut_Most advenientem
759
20387Plaut_Most abeamus
760
20660Plaut_Most abi
761
21509Plaut_Most proveniant
762
21727Plaut_Most egredere
763
22022Plaut_Most abi
764
222

1104
1181Tac adgredior
1105
4144Tac praeventamque
1106
5196Tac perierant
1107
7474Tac conveniebat
1108
8388Tac adissent
1109
11831Tac subisse
1110
12254Tac transgressus
1111
14325Tac effugerit
1112
15416Tac inveniet
1113
21029Tac perire
1114
22066Tac transitur
1115
22614Tac adire
1116
22904Tac progressus
1117
25274Tac convenerat
1118
27098Tac perire
1119
28783Tac evenit
1120
30919Tac adeat
1121
31215Tac occurrendum
1122
31640Tac egresso
1123
32940Tac adeundi
1124
33232Tac praeire
1125
33949Tac perire
1126
34135Tac subit
1127
34312Tac ingressus
1128
36279Tac egressum
1129
36530Tac evenit
1130
36595Tac effugerat
1131
39282Tac occurrens
1132
39588Tac advenere
1133
40485Tac invenit
1134
40690Tac anteire
1135
44085Tac ingressa
1136
47432Tac evenisset
1137
48971Tac ingressus
1138
49007Tac adierat
1139
50006Tac transiturum
1140
50060Tac occurreret
1141
50603Tac transgressum
1142
51655Tac abire
1143
54404Tac ingressus
1144
57248Tac perfugerit
1145
57619Tac degredi
1146
58807Tac praevolavit
114

In [3]:
import pandas as pd

In [4]:
# Creating a DataFrame from the items in id2tok dictionary
tokenid_df = pd.DataFrame([(k, v) for k, v in id2tok.items()], columns=["ID", "VERB TOKEN"]) #where 'tokenid_df includes the token id and the token'

tokenid_df


Unnamed: 0,ID,VERB TOKEN
0,7504Aen,perire
1,7614Aen,Concurrunt
2,7807Aen,conveniunt
3,627Ap,praeeunte
4,638Ap,aggressus
...,...,...
1478,435595Verg,procurrit
1479,436869Verg,succurrere
1480,438653Verg,occurrere
1481,440736Verg,subirent


### Morphological features

In [5]:
list_morphological_features = []  # Initializing an empty list to store morphological features
pred2_morphological_features = dict()  # Initializing an empty dictionary to store predicted morphological features mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'MorphologicalFeatures' annotation in the CAS
    for relation in cas.select('de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures'):
        # Looping through each token covered by the 'MorphologicalFeatures' annotation
        for token in cas.select_covered('de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            morphological_feature = relation.value  # Getting the morphological feature value
            
            # Checking if the ID already exists in pred2mf dictionary
            if id in pred2_morphological_features:
                # If ID exists, append the new morphological feature to the existing list of features
                list_morphological_features = pred2_morphological_features[id] + morphological_feature
                pred2_morphological_features[id] = list_morphological_features
            else:
                # If ID doesn't exist, store the morphological feature directly
                pred2_morphological_features[id] = morphological_feature

print(pred2_morphological_features)


{'7504Aen': 'Tense=Pres|VerbForm=Inf|Voice=Act', '7614Aen': 'Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act', '7807Aen': 'Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act', '627Ap': 'Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|VerbForm=Part|Voice=Act', '638Ap': 'Case=Nom|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass', '2954Ap': 'Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin|Voice=Act', '3306Ap': 'Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act', '4001Ap': 'Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Tense=Fut|VerbForm=Fin|Voice=Act', '6965Ap': 'Case=Nom|Gender=Masc|Number=Sing|Tense=Fut|VerbForm=Part|Voice=Act', '9077Ap': 'Mood=Subj|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin|Voice=Act', '11123Ap': 'Mood=Subj|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin|Voice=Act', '11895Ap': 'Tense=Pres|VerbForm=Inf|Voice=Act', '13304Ap': 'Tense=Pres|VerbForm=Inf|Voice=Act', '14102Ap': 'Mood=Subj|Number=Sing|Person=2|Tense=Pres|VerbFo

In [6]:
morphological_features_df = pd.DataFrame([(k,v) for k,v in pred2_morphological_features.items()], columns=["ID", "MORPHOLOGICAL FEATURES"]) #where 'morphological_features_df' is a dataframe containing token IDs and morphological features
morphological_features_df

Unnamed: 0,ID,MORPHOLOGICAL FEATURES
0,7504Aen,Tense=Pres|VerbForm=Inf|Voice=Act
1,7614Aen,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...
2,7807Aen,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...
3,627Ap,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...
4,638Ap,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...
...,...,...
1478,435595Verg,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...
1479,436869Verg,Tense=Pres|VerbForm=Inf|Voice=Act
1480,438653Verg,Tense=Pres|VerbForm=Inf|Voice=Act
1481,440736Verg,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...


In [7]:
id_morphological_features_df = tokenid_df.merge(morphological_features_df, on='ID', how='left') #where 'id_morphological_features_df' merges 'tokenid_df' and 'morphological_features_df'
id_morphological_features_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...
...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...


### Lemma

In [8]:
list_lemmas = []  # Initializing an empty list to store lemmas
pred2_lemmas = dict()  # Initializing an empty dictionary to store predicted lemmas mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Lemma' annotation in the CAS
    for relation in cas.select('de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma'):
        # Looping through each token covered by the 'Lemma' annotation
        for token in cas.select_covered('de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            lemma = relation.value  # Getting the lemma value
            
            # Checking if the ID already exists in pred2_lemmas dictionary
            if id in pred2_lemmas:
                # If ID exists, append the new lemma to the existing list of lemmas
                list_lemmas = pred2_lemmas[id] + lemma
                pred2_lemmas[id] = list_lemmas
            else:
                # If ID doesn't exist, store the lemma directly
                pred2_lemmas[id] = lemma

print(pred2_lemmas)


{'7489Aen': 'campus', '7504Aen': 'pereo', '7614Aen': 'concurro', '7799Aen': 'undique', '7807Aen': 'convenio', '7831Aen': 'telum', '7836Aen': 'tribunus', '583Ap': 'sermo', '618Ap': 'magister', '627Ap': 'praeeo', '638Ap': 'adgredior', '2954Ap': 'intereo', '3256Ap': 'bacillum', '3278Ap': 'ad', '3281Ap': 'occipitium', '3292Ap': 'per', '3296Ap': 'ingluvies', '3306Ap': 'subeo', '3991Ap': 'civitas', '4001Ap': 'pervenio', '6953Ap': 'spectaculum', '6965Ap': 'obeo', '9077Ap': 'provenio', '11123Ap': 'aufugio', '11134Ap': 'istinc', '11872Ap': 'affectus', '11881Ap': 'in', '11884Ap': 'contrarius', '11895Ap': 'provenio', '13271Ap': 'nomen', '13304Ap': 'convenio', '14090Ap': 'per', '14094Ap': 'fluvius', '14102Ap': 'transeo', '14123Ap': 'abeo', '16724Ap': 'adgredior', '20774Ap': 'per', '20796Ap': 'solitudo', '20808Ap': 'aufugio', '21324Ap': 'provenio', '21888Ap': 'ad', '21891Ap': 'villula', '21919Ap': 'abeo', '22995Ap': 'progredior', '23699Ap': 'invenio', '27261Ap': 'abeo', '29687Ap': 'circumeo', '3172

In [9]:
lemmas_df = pd.DataFrame([(k,v) for k,v in pred2_lemmas.items()], columns=["ID", "LEMMA"]) #where 'lemmas_df' is a dataframe containing token IDs and lemmas
lemmas_df

Unnamed: 0,ID,LEMMA
0,7489Aen,campus
1,7504Aen,pereo
2,7614Aen,concurro
3,7799Aen,undique
4,7807Aen,convenio
...,...,...
3282,440708Verg,ille
3283,440736Verg,subeo
3284,441917Verg,per
3285,441937Verg,transeo


In [10]:
id_lemmas_df = id_morphological_features_df.merge(lemmas_df, on='ID', how='left') #where 'id_lemmas_df' merges 'id_morphological_features_df' and 'lemmas_df'
id_lemmas_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior
...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo


### Sentence

In [11]:
list_sentences = []  # Initializing an empty list to store sentences
pred2_sentences = dict()  # Initializing an empty dictionary to store predicted sentences mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Includes' relation in the CAS
    for relation in cas.select('webanno.custom.Includes'):
        dep = relation.Dependent  # Getting the dependent token of the relation
        tokdep = dep.get_covered_text()  # Getting the text covered by the dependent token
        id = str(dep.begin) + file_input_abbr  # Creating a unique ID for the dependent token based on its offset and file abbreviation
        list_sentences.append(str(tokdep))  # Appending the text of the dependent token to the list of sentences
        
        gov = relation.Governor  # Getting the governor token of the relation
        sentence = gov.get_covered_text()  # Getting the text covered by the governor token
        
        # Checking if the ID already exists in pred2_sentences dictionary
        if id in pred2_sentences:
            # If ID exists, append the new token text to the existing list of tokens representing the sentence
            list_sentences = pred2_sentences[id] + sentence
            pred2_sentences[id] = list_sentences
        else:
            # If ID doesn't exist, store the token text directly
            pred2_sentences[id] = sentence

print(pred2_sentences)


{'7504Aen': 'Quae neque Dardaniis campis potuere perire,\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0349\u2028Nec cum capta capi, nec cum combusta cremari', '7614Aen': 'Concurrunt vel uti venti cum spiritus austri\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0430\u2028Imbricitor aquiloque suo cum flamine contra\u2028Indu mari magno fluctus extollere certant', '7807Aen': 'Undique conveniunt vel ut imber tela tribuno', '627Ap': 'Mox in urbe Latia advena studiorum, Quiritium indigenam sermonem aerumnabili labore, nullo magistro praeeunte, aggressus excolui', '638Ap': 'Mox in urbe Latia advena studiorum, Quiritium indigenam sermonem aerumnabili labore, nullo magistro praeeunte, aggressus excolui', '2954Ap': 'Ego denique vespera, dum polentae caseatae modico secus offulam grandiorem in convivas aemulus contruncare gestio, mollitie cibi glutinosi faucibus inhaerentis et meacula spiritus distinentis minimo minus interii', '3306Ap': 'et ecce pone lanceae ferrum, qua

In [12]:
sentences_df = pd.DataFrame([(k,v) for k,v in pred2_sentences.items()], columns=["ID", "SENTENCE"]) #where 'sentences_df' is a dataframe containing token IDs and the whole sentences including the verb tokens
sentences_df

Unnamed: 0,ID,SENTENCE
0,7504Aen,"Quae neque Dardaniis campis potuere perire, ..."
1,7614Aen,Concurrunt vel uti venti cum spiritus austri ...
2,7807Aen,Undique conveniunt vel ut imber tela tribuno
3,627Ap,"Mox in urbe Latia advena studiorum, Quiritium ..."
4,638Ap,"Mox in urbe Latia advena studiorum, Quiritium ..."
...,...,...
1478,435595Verg,"Dum nititur acer et instat, rursus in aurigae ..."
1479,436869Verg,"Iuturnam misero, fateor, succurrere fratri sua..."
1480,438653Verg,Harum unam celerem demisit ab aethere summo Iu...
1481,440736Verg,"Vix illud lecti bis sex cervice subirent, qual..."


In [13]:
id_sentences_df = id_lemmas_df.merge(sentences_df, on='ID', how='left') #where 'id_sentences_df' merges 'id_lemmas_df' and 'sentences_df'
id_sentences_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ..."
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ..."
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ..."
...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ..."
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua..."
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual..."


### Actionality

In [14]:
list_actionality = []  # Initializing an empty list to store actionality values
pred2_actionality = dict()  # Initializing an empty dictionary to store predicted actionality values mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Actionality' annotation in the CAS
    for relation in cas.select('webanno.custom.Actionality'):
        # Looping through each token covered by the 'Actionality' annotation
        for token in cas.select_covered('webanno.custom.Actionality', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            actionality = relation.Actionality  # Getting the actionality value
            
            # Checking if the ID already exists in pred2_actionality dictionary
            if id in pred2_actionality:
                # If ID exists, append the new actionality to the existing list of actionalities
                list_actionality = pred2_actionality[id] + actionality
                pred2_actionality[id] = list_actionality
            else:
                # If ID doesn't exist, store the actionality directly
                pred2_actionality[id] = actionality

print(pred2_actionality)


{'7504Aen': 'Achievement', '7614Aen': 'Achievement', '7807Aen': 'Activity', '627Ap': 'State', '638Ap': 'Accomplishment', '2954Ap': 'Achievement', '3306Ap': 'State', '4001Ap': 'Accomplishment', '6965Ap': 'Accomplishment', '9077Ap': 'Achievement', '11123Ap': 'Achievement', '11895Ap': 'Achievement', '13304Ap': 'State', '14102Ap': 'Activity', '14123Ap': 'Achievement', '16724Ap': 'Achievement', '20808Ap': 'Achievement', '21324Ap': 'Achievement', '21919Ap': 'Achievement', '22995Ap': 'Accomplishment', '23699Ap': 'State', '27261Ap': 'Achievement', '29687Ap': 'Activity', '31740Ap': 'Accomplishment', '32160Ap': 'Achievement', '32491Ap': 'Achievement', '34900Ap': 'Accomplishment', '35126Ap': 'Achievement', '37524Ap': 'State', '38710Ap': 'Achievement', '40020Ap': 'Activity', '40049Ap': 'State', '40257Ap': 'Achievement', '40602Ap': 'Accomplishment', '43327Ap': 'Accomplishment', '44239Ap': 'Achievement', '46122Ap': 'Achievement', '46705Ap': 'Achievement', '47736Ap': 'Accomplishment', '51135Ap': 'Acc

In [15]:
actionality_df = pd.DataFrame([(k,v) for k,v in pred2_actionality.items()], columns=["ID", "ACTIONALITY"]) #where 'actionality_df' is a dataframe containing token IDs and actionality values of the verb tokens
actionality_df

Unnamed: 0,ID,ACTIONALITY
0,7504Aen,Achievement
1,7614Aen,Achievement
2,7807Aen,Activity
3,627Ap,State
4,638Ap,Accomplishment
...,...,...
1478,435595Verg,Achievement
1479,436869Verg,Activity
1480,438653Verg,Accomplishment
1481,440736Verg,Achievement


In [16]:
id_actionality_df = id_sentences_df.merge(actionality_df, on='ID', how='left') #where 'id_actionality_df' merges 'id_sentences_df' and 'actionality_df'
id_actionality_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment
...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement


### Verb class

In [17]:
list_verbclass = []  # Initializing an empty list to store verb classes
pred2_verbclass = dict()  # Initializing an empty dictionary to store predicted verb classes mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Motionclass' annotation in the CAS
    for relation in cas.select('webanno.custom.Motionclass'):
        # Looping through each token covered by the 'Motionclass' annotation
        for token in cas.select_covered('webanno.custom.Motionclass', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            verbclass = relation.Motionclass  # Getting the verb class value
            
            # Checking if the ID already exists in pred2_verbclass dictionary
            if id in pred2_verbclass:
                # If ID exists, append the new verb class to the existing list of verb classes
                list_verbclass = pred2_verbclass[id] + verbclass
                pred2_verbclass[id] = list_verbclass
            else:
                # If ID doesn't exist, store the verb class directly
                pred2_verbclass[id] = verbclass

print(pred2_verbclass)


{'7504Aen': 'DIE-42.4.1', '7614Aen': 'BUMP-18.4-1', '7807Aen': 'HERD-47.5.2', '627Ap': 'ACCOMPANY-51.7', '638Ap': 'BEGIN-55.1', '2954Ap': 'DIE-42.4.1', '3306Ap': 'EXIST-47.1', '4001Ap': 'ESCAPE-51.1', '6965Ap': 'REACH-51.8', '9077Ap': 'OCCUR-48.3', '11123Ap': 'ESCAPE-51.1', '11895Ap': 'APPEAR-48.1.1', '13304Ap': 'HARMONIZE-22.6', '14102Ap': 'ESCAPE-51.1', '14123Ap': 'LEAVE-51.2', '16724Ap': 'BEGIN-55.1', '20808Ap': 'LEAVE-51.2', '21324Ap': 'OCCUR-48.3', '21919Ap': 'ESCAPE-51.1', '22995Ap': 'ESCAPE-51.1', '23699Ap': 'DISCOVER-84', '27261Ap': 'LEAVE-51.2', '29687Ap': 'ESCAPE-51.1', '31740Ap': 'ESCAPE-51.1', '32160Ap': 'ESCAPE-51.1', '32491Ap': 'ESCAPE-51.1', '34900Ap': 'RUN-51.3.2', '35126Ap': 'CONFRONT-98', '37524Ap': 'APPEAR-48.1.1', '38710Ap': 'APPEAR-48.1.1', '40020Ap': 'DIE-42.4.1', '40049Ap': 'DIE-42.4.1', '40257Ap': 'LEAVE-51.2', '40602Ap': 'ESCAPE-51.1', '43327Ap': 'ESCAPE-51.1', '44239Ap': 'LEAVE-51.2', '46122Ap': 'HELP-72.1', '46705Ap': 'APPEAR-48.1.1', '47736Ap': 'REACH-51.8',

In [18]:
verbclass_df = pd.DataFrame([(k,v) for k,v in pred2_verbclass.items()], columns=["ID", "VERB CLASS"]) #where 'verbclass_df' is a dataframe containing token IDs and motion clases of the verb tokens
verbclass_df

Unnamed: 0,ID,VERB CLASS
0,7504Aen,DIE-42.4.1
1,7614Aen,BUMP-18.4-1
2,7807Aen,HERD-47.5.2
3,627Ap,ACCOMPANY-51.7
4,638Ap,BEGIN-55.1
...,...,...
1478,435595Verg,RUN-51.3.2
1479,436869Verg,HELP-72.1
1480,438653Verg,RUN-51.3.2
1481,440736Verg,PUT_DIRECTION-9.4


In [19]:
id_verbclass_df = id_actionality_df.merge(verbclass_df, on='ID', how='left') #where 'id_verbclass_df' merges 'id_actionality_df' and 'verbclass_df'
id_verbclass_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1
...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4


### Literal meaning

In [20]:
list_literalmeanings = []  # Initializing an empty list to store literal meanings
pred2_literalmeaning = dict()  # Initializing an empty dictionary to store predicted literal meanings mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Literalmeaning' annotation in the CAS
    for relation in cas.select('webanno.custom.Literalmeaning'):
        # Looping through each token covered by the 'Literalmeaning' annotation
        for token in cas.select_covered('webanno.custom.Literalmeaning', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            literalmeaning = relation.Literalmeaning  # Getting the literal meaning value
            
            # Checking if the ID already exists in pred2_literalmeaning dictionary
            if id in pred2_literalmeaning:
                # If ID exists, append the new literal meaning to the existing list of literal meanings
                list_literalmeanings = pred2_literalmeaning[id] + literalmeaning
                pred2_literalmeaning[id] = list_literalmeanings
            else:
                # If ID doesn't exist, store the literal meaning directly
                pred2_literalmeaning[id] = literalmeaning

print(pred2_literalmeaning)


{'7504Aen': False, '7614Aen': True, '7807Aen': True, '627Ap': False, '638Ap': False, '2954Ap': False, '3306Ap': False, '4001Ap': True, '6965Ap': True, '9077Ap': False, '11123Ap': True, '11895Ap': False, '13304Ap': False, '14102Ap': True, '14123Ap': True, '16724Ap': False, '20808Ap': True, '21324Ap': False, '21919Ap': True, '22995Ap': True, '23699Ap': False, '27261Ap': True, '29687Ap': True, '31740Ap': True, '32160Ap': True, '32491Ap': False, '34900Ap': False, '35126Ap': False, '37524Ap': False, '38710Ap': False, '40020Ap': False, '40049Ap': False, '40257Ap': True, '40602Ap': True, '43327Ap': True, '44239Ap': True, '46122Ap': True, '46705Ap': True, '47736Ap': True, '51135Ap': True, '51515Ap': True, '53490Ap': False, '54378Ap': True, '55177Ap': True, '55297Ap': True, '55702Ap': True, '55779Ap': False, '57275Ap': True, '61144Ap': False, '61380Ap': False, '62018Ap': False, '63203Ap': False, '65487Ap': True, '66768Ap': True, '67051Ap': True, '69323Ap': False, '69546Ap': False, '69809Ap': Tr

In [21]:
literal_meaning_df = pd.DataFrame([(k,v) for k,v in pred2_literalmeaning.items()], columns=["ID", "LITERAL MEANING"])  #where 'literal_meaning_df' is a dataframe containing token IDs and (Boolean) literal meanings of the verb tokens
literal_meaning_df

Unnamed: 0,ID,LITERAL MEANING
0,7504Aen,False
1,7614Aen,True
2,7807Aen,True
3,627Ap,False
4,638Ap,False
...,...,...
1478,435595Verg,True
1479,436869Verg,False
1480,438653Verg,False
1481,440736Verg,False


In [22]:
id_literal_meaning_df = id_verbclass_df.merge(literal_meaning_df, on='ID', how='left') #where 'id_literal_meaning_df' merges 'id_verbclass_df' and 'literal_meaning_df'
id_literal_meaning_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False
...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False


### Verb stem

In [23]:
list_verbstem = []  # Initializing an empty list to store verb stems
pred2_verbstem = dict()  # Initializing an empty dictionary to store predicted verb stems mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Verbstem' annotation in the CAS
    for relation in cas.select('webanno.custom.Verbstem'):
        # Looping through each token covered by the 'Verbstem' annotation
        for token in cas.select_covered('webanno.custom.Verbstem', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            verbstem = relation.Verbstem  # Getting the verb stem value
            
            # Checking if the ID already exists in pred2_verbstem dictionary
            if id in pred2_verbstem:
                # If ID exists, append the new verb stem to the existing list of verb stems
                list_verbstem = pred2_verbstem[id] + verbstem
                pred2_verbstem[id] = list_verbstem
            else:
                # If ID doesn't exist, store the verb stem directly
                pred2_verbstem[id] = verbstem

print(pred2_verbstem)


{'7489Aen': 'present stem', '7504Aen': 'present stem', '7614Aen': 'present stem', '7807Aen': 'present stem', '627Ap': 'present stem', '638Ap': 'supine stem', '2954Ap': 'perfect stem', '3306Ap': 'present stem', '4001Ap': 'perfect stem', '6965Ap': 'supine stem', '9077Ap': 'present stem', '11123Ap': 'present stem', '11895Ap': 'present stem', '13304Ap': 'present stem', '14102Ap': 'present stem', '14123Ap': 'present stem', '16724Ap': 'present stem', '20808Ap': 'perfect stem', '21324Ap': 'present stem', '21919Ap': 'perfect stem', '22995Ap': 'supine stem', '23699Ap': 'present stem', '27261Ap': 'present stem', '29687Ap': 'present stem', '31740Ap': 'perfect stem', '32160Ap': 'present stem', '32491Ap': 'present stem', '34900Ap': 'present stem', '35126Ap': 'present stem', '37524Ap': 'present stem', '38710Ap': 'present stem', '40020Ap': 'present stem', '40049Ap': 'perfect stem', '40257Ap': 'present stem', '40602Ap': 'perfect stem', '43327Ap': 'present stem', '44239Ap': 'perfect stem', '46122Ap': '

In [24]:
verbstem_df = pd.DataFrame([(k,v) for k,v in pred2_verbstem.items()], columns=["ID", "VERB STEM"]) #where 'verbstem_df' is a dataframe containing token IDs and verb stems of the verb tokens
verbstem_df

Unnamed: 0,ID,VERB STEM
0,7489Aen,present stem
1,7504Aen,present stem
2,7614Aen,present stem
3,7807Aen,present stem
4,627Ap,present stem
...,...,...
1479,435595Verg,present stem
1480,436869Verg,present stem
1481,438653Verg,present stem
1482,440736Verg,present stem


In [25]:
id_verbstem_df = id_literal_meaning_df.merge(verbstem_df, on='ID', how='left') #where 'id_verbstem_df' merges 'id_literal_meaning_df' and 'verbstem_df'
id_verbstem_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING,VERB STEM
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False,present stem
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True,present stem
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True,present stem
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False,present stem
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False,supine stem
...,...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True,present stem
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False,present stem
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False,present stem
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False,present stem


### Preverb

In [26]:
list_preverbs = []  # Initializing an empty list to store preverbs
pred2_preverbs = dict()  # Initializing an empty dictionary to store predicted preverbs mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Preverb' annotation in the CAS
    for relation in cas.select('webanno.custom.Preverb'):
        # Looping through each token covered by the 'Preverb' annotation
        for token in cas.select_covered('webanno.custom.Preverb', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            preverb = relation.Prev  # Getting the preverb value
            
            # Checking if the ID already exists in pred2_preverbs dictionary
            if id in pred2_preverbs:
                # If ID exists, append the new preverb to the existing list of preverbs
                list_preverbs = pred2_preverbs[id] + preverb
                pred2_preverbs[id] = list_preverbs
            else:
                # If ID doesn't exist, store the preverb directly
                pred2_preverbs[id] = preverb

print(pred2_preverbs)


{'7504Aen': 'per', '7614Aen': 'cum', '7807Aen': 'cum', '627Ap': 'prae', '638Ap': 'ad', '2954Ap': 'inter', '3306Ap': 'sub', '4001Ap': 'per', '6965Ap': 'ob', '9077Ap': 'pro', '11123Ap': 'ab', '11895Ap': 'pro', '13304Ap': 'cum', '14102Ap': 'trans', '14123Ap': 'ab', '16724Ap': 'ad', '20808Ap': 'ab', '21324Ap': 'pro', '21919Ap': 'ab', '22995Ap': 'pro', '23699Ap': 'in', '27261Ap': 'ab', '29687Ap': 'circum', '31740Ap': 'per', '32160Ap': 'intro', '32491Ap': 'ex', '34900Ap': 'ex', '35126Ap': 'cum', '37524Ap': 'ob', '38710Ap': 'ob', '40020Ap': 'per', '40049Ap': 'per', '40257Ap': 'ab', '40602Ap': 'ad', '43327Ap': 'ad', '44239Ap': 'ab', '46122Ap': 'sub', '46705Ap': 'sub', '47736Ap': 'ad', '51135Ap': 'ad', '51515Ap': 'ab', '53490Ap': 'ob', '54378Ap': 'ab', '55177Ap': 'ab', '55297Ap': 'ab', '55702Ap': 'ad', '55779Ap': 'cum', '57275Ap': 'ob', '61144Ap': 'cum', '61380Ap': 'ad', '62018Ap': 'ad', '63203Ap': 'in', '65487Ap': 'pro', '66768Ap': 'circum', '67051Ap': 'pro', '69323Ap': 'ad', '69546Ap': 'ad', 

In [27]:
preverbs_df = pd.DataFrame([(k,v) for k,v in pred2_preverbs.items()], columns=["ID", "PREVERB"]) #where 'preverbs_df' is a dataframe containing token IDs and preverbs of the verb tokens
preverbs_df

Unnamed: 0,ID,PREVERB
0,7504Aen,per
1,7614Aen,cum
2,7807Aen,cum
3,627Ap,prae
4,638Ap,ad
...,...,...
1478,435595Verg,pro
1479,436869Verg,sub
1480,438653Verg,ob
1481,440736Verg,sub


In [28]:
id_preverbs_df = id_verbstem_df.merge(preverbs_df, on='ID', how='left') #where 'id_preverbs_df' merges 'id_verbstem_df' and 'preverbs_df'
id_preverbs_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING,VERB STEM,PREVERB
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False,present stem,per
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True,present stem,cum
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True,present stem,cum
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False,present stem,prae
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False,supine stem,ad
...,...,...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True,present stem,pro
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False,present stem,sub
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False,present stem,ob
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False,present stem,sub


### Preverb semantics

In [29]:
list_preverb_semantics = []  # Initializing an empty list to store preverb semantics
pred2_preverb_semantics = dict()  # Initializing an empty dictionary to store predicted preverb semantics mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'SemPrev' annotation in the CAS
    for relation in cas.select('webanno.custom.SemPrev'):
        # Looping through each token covered by the 'SemPrev' annotation
        for token in cas.select_covered('webanno.custom.SemPrev', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            preverb_semantics = relation.Preverbsemantics  # Getting the preverb semantics value
            
            # Checking if the ID already exists in pred2_preverb_semantics dictionary
            if id in pred2_preverb_semantics:
                # If ID exists, append the new preverb semantics to the existing list of preverb semantics
                list_preverb_semantics = pred2_preverb_semantics[id] + preverb_semantics 
                pred2_preverb_semantics[id] = list_preverb_semantics
            else:
                # If ID doesn't exist, store the preverb semantics directly
                pred2_preverb_semantics[id] = preverb_semantics

print(pred2_preverb_semantics)


{'7504Aen': uima_cas_StringArray(xmiID=None, elements=['(idea of destruction/death)', 'across'], type=Type(name=uima.cas.StringArray)), '7614Aen': uima_cas_StringArray(xmiID=None, elements=['together'], type=Type(name=uima.cas.StringArray)), '7807Aen': uima_cas_StringArray(xmiID=None, elements=['together'], type=Type(name=uima.cas.StringArray)), '627Ap': uima_cas_StringArray(xmiID=None, elements=['before'], type=Type(name=uima.cas.StringArray)), '638Ap': uima_cas_StringArray(xmiID=None, elements=['to'], type=Type(name=uima.cas.StringArray)), '2954Ap': uima_cas_StringArray(xmiID=None, elements=['completely'], type=Type(name=uima.cas.StringArray)), '3306Ap': uima_cas_StringArray(xmiID=None, elements=['under'], type=Type(name=uima.cas.StringArray)), '4001Ap': uima_cas_StringArray(xmiID=None, elements=['completely'], type=Type(name=uima.cas.StringArray)), '6965Ap': uima_cas_StringArray(xmiID=None, elements=['to'], type=Type(name=uima.cas.StringArray)), '9077Ap': uima_cas_StringArray(xmiID=

In [30]:
list_preverb_semantics = []  # Initializing an empty list to store preverb semantics

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'SemPrev' annotation in the CAS
    for relation in cas.select('webanno.custom.SemPrev'):
        preverb_semantics = relation.Preverbsemantics  # Getting the preverb semantics value
        # Append the preverb semantics to the list after removing unwanted characters
        preverb_semantics_str = ', '.join(preverb_semantics.elements)  # Joining the elements of StringArray
        list_preverb_semantics.append(preverb_semantics_str)  # Appending the preverb semantics to the list

print(list_preverb_semantics)


['(idea of destruction/death), across', 'together', 'together', 'before', 'to', 'completely', 'under', 'completely', 'to', 'forth', 'away', 'forth', 'together', 'through', 'away', 'to', 'away', 'forth', 'away', 'onwards', 'into', 'away', 'around', 'completely', 'into', 'out', 'away', 'together', 'against', 'against', '(idea of destruction/death), across', '(idea of destruction/death), across', 'away', 'to', 'to', 'away', 'under', 'up, from under', 'to', 'to', 'away', 'over', 'away', 'away', 'away', 'to', 'together', 'against', 'together with', 'to', 'to', 'against', 'forward', 'around', 'forward', 'to', '(malefactive), to', '(malefactive), to', 'downwards', 'under', 'forward', 'into', 'away', 'into', 'distributively', 'away', 'into', 'to', 'away', 'to', 'out', 'into', 'forward', '(malefactive), to', 'under', '(none)', 'before', 'completely', 'over', 'completely', 'to', 'completely', 'under', 'away', 'to', 'under', 'under', 'beyond', 'against', 'into', 'forward', '(idea of destruction/d

In [31]:
list_token_ids = []  # Initializing an empty list to store token IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Actionality' annotation in the CAS
    for relation in cas.select('webanno.custom.Actionality'):
        # Looping through each token covered by the 'Actionality' annotation
        for token in cas.select_covered('webanno.custom.Actionality', relation):
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            list_token_ids.append(id)  # Appending the token ID to the list

print(list_token_ids)


['7504Aen', '7614Aen', '7807Aen', '627Ap', '638Ap', '2954Ap', '3306Ap', '4001Ap', '6965Ap', '9077Ap', '11123Ap', '11895Ap', '13304Ap', '14102Ap', '14123Ap', '16724Ap', '20808Ap', '21324Ap', '21919Ap', '22995Ap', '23699Ap', '27261Ap', '29687Ap', '31740Ap', '32160Ap', '32491Ap', '34900Ap', '35126Ap', '37524Ap', '38710Ap', '40020Ap', '40049Ap', '40257Ap', '40602Ap', '43327Ap', '44239Ap', '46122Ap', '46705Ap', '47736Ap', '51135Ap', '51515Ap', '53490Ap', '54378Ap', '55177Ap', '55297Ap', '55702Ap', '55779Ap', '57275Ap', '61144Ap', '61380Ap', '62018Ap', '63203Ap', '65487Ap', '66768Ap', '67051Ap', '69323Ap', '69546Ap', '69809Ap', '71331Ap', '71779Ap', '72278Ap', '74642Ap', '75734Ap', '76958Ap', '77867Ap', '79760Ap', '80340Ap', '84767Ap', '85975Ap', '87247Ap', '89491Ap', '90315Ap', '92077Ap', '92526Ap', '93160Ap', '93188Ap', '95922Ap', '96697Ap', '98497Ap', '99825Ap', '102331Ap', '105548Ap', '105746Ap', '109735Ap', '110168Ap', '110868Ap', '111432Ap', '113591Ap', '115140Ap', '116038Ap', '117021A

In [32]:
# Create a dictionary where each token ID from idlist is mapped to its corresponding preverb semantics from list_preverb_semantics
pred2_preverb_semantics = {list_token_ids[i]: list_preverb_semantics[i] for i in range(len(list_token_ids))}
pred2_preverb_semantics

{'7504Aen': '(idea of destruction/death), across',
 '7614Aen': 'together',
 '7807Aen': 'together',
 '627Ap': 'before',
 '638Ap': 'to',
 '2954Ap': 'completely',
 '3306Ap': 'under',
 '4001Ap': 'completely',
 '6965Ap': 'to',
 '9077Ap': 'forth',
 '11123Ap': 'away',
 '11895Ap': 'forth',
 '13304Ap': 'together',
 '14102Ap': 'through',
 '14123Ap': 'away',
 '16724Ap': 'to',
 '20808Ap': 'away',
 '21324Ap': 'forth',
 '21919Ap': 'away',
 '22995Ap': 'onwards',
 '23699Ap': 'into',
 '27261Ap': 'away',
 '29687Ap': 'around',
 '31740Ap': 'completely',
 '32160Ap': 'into',
 '32491Ap': 'out',
 '34900Ap': 'away',
 '35126Ap': 'together',
 '37524Ap': 'against',
 '38710Ap': 'against',
 '40020Ap': '(idea of destruction/death), across',
 '40049Ap': '(idea of destruction/death), across',
 '40257Ap': 'away',
 '40602Ap': 'to',
 '43327Ap': 'to',
 '44239Ap': 'away',
 '46122Ap': 'under',
 '46705Ap': 'up, from under',
 '47736Ap': 'to',
 '51135Ap': 'to',
 '51515Ap': 'away',
 '53490Ap': 'over',
 '54378Ap': 'away',
 '5517

In [33]:
preverb_semantics_df = pd.DataFrame([(k,v) for k,v in pred2_preverb_semantics.items()], columns=["ID", "PREVERB SEMANTICS"]) #where 'preverb_semantics_df' is a dataframe containing token IDs and preverb semantics of the verb tokens
preverb_semantics_df

Unnamed: 0,ID,PREVERB SEMANTICS
0,7504Aen,"(idea of destruction/death), across"
1,7614Aen,together
2,7807Aen,together
3,627Ap,before
4,638Ap,to
...,...,...
1478,435595Verg,forward
1479,436869Verg,under
1480,438653Verg,to
1481,440736Verg,under


In [34]:
id_preverb_semantics_df = id_preverbs_df.merge(preverb_semantics_df, on='ID', how='left') #where 'id_preverb_semantics_df' merges 'id_preverbs_df' and 'preverb_semantics_df'
id_preverb_semantics_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING,VERB STEM,PREVERB,PREVERB SEMANTICS
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False,present stem,per,"(idea of destruction/death), across"
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True,present stem,cum,together
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True,present stem,cum,together
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False,present stem,prae,before
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False,supine stem,ad,to
...,...,...,...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True,present stem,pro,forward
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False,present stem,sub,under
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False,present stem,ob,to
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False,present stem,sub,under


### Verb semantics

In [35]:
list_synsets = []  # Initializing an empty list to store semantic classes
tok2_synsets = dict()  # Initializing an empty dictionary to store predicted semantic classes mapped to IDs
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'SemClass' annotation in the CAS
    for relation in cas.select('webanno.custom.SemClass'):
        # Looping through each token covered by the 'SemClass' annotation
        for token in cas.select_covered('webanno.custom.SemClass', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            synset = relation.Synsets  # Getting the semantic class value
            
            # Checking if the ID already exists in tok2_synsets dictionary
            if id in tok2_synsets:
                # If ID exists, append the new semantic class to the existing list of semantic classes
                list_synsets = tok2_synsets[id] + synset 
                tok2_synsets[id] = list_synsets
            else:
                # If ID doesn't exist, store the semantic class directly
                tok2_synsets[id] = synset

print(tok2_synsets)


{'7489Aen': uima_cas_StringArray(xmiID=None, elements=['n#06727012 extensive tract of level open land'], type=Type(name=uima.cas.StringArray)), '7504Aen': uima_cas_StringArray(xmiID=None, elements=['v#00250254 pass from physical life and lose all all bodily attributes and functions necessary to sustain life'], type=Type(name=uima.cas.StringArray)), '7614Aen': uima_cas_StringArray(xmiID=None, elements=['v#01075789 crash together with violent impact'], type=Type(name=uima.cas.StringArray)), '7799Aen': uima_cas_StringArray(xmiID=None, elements=['r#L2533591 from any or all places'], type=Type(name=uima.cas.StringArray)), '7807Aen': uima_cas_StringArray(xmiID=None, elements=['v#01654097 collect in one place'], type=Type(name=uima.cas.StringArray)), '7831Aen': uima_cas_StringArray(xmiID=None, elements=['n#03176413 a body that is thrown or projected'], type=Type(name=uima.cas.StringArray)), '7836Aen': uima_cas_StringArray(xmiID=None, elements=['n#07168973 an officer in command of a military u

In [36]:
list_synsets = []  # Initializing an empty list to store synsets

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'SemClass' annotation in the CAS
    for relation in cas.select('webanno.custom.SemClass'):
        synset = relation.Synsets  # Getting the synset value
        # Extracting the useful information from the StringArray elements
        synset_values = [element.strip().strip("'") for element in synset.elements]
        # Appending the synset values as a sublist
        list_synsets.append(synset_values)

print(list_synsets)


[['n#06727012 extensive tract of level open land'], ['v#00250254 pass from physical life and lose all all bodily attributes and functions necessary to sustain life'], ['v#01075789 crash together with violent impact'], ['r#L2533591 from any or all places'], ['v#01654097 collect in one place'], ['n#03176413 a body that is thrown or projected'], ['n#07168973 an officer in command of a military unit', 'n#00004123 a human being'], ['n#05167497 a human written or spoken language used by a community; opposed to e.g. a computer language'], ['n#07632177 a person whose occupation is teaching'], ['v#01661609 be a guiding force, as with directions or advice'], ['v#01661230 begin to deal with'], ['v#00250254 pass from physical life and lose all all bodily attributes and functions necessary to sustain life'], ['n#03247107 a long thin implement made of metal or wood'], ['n#04292200 back part of the head or skull'], ['n#04296952 the passage to the stomach and lungs; in the front part of the neck below

In [37]:
list_token_synsets_ids = []  # Initializing an empty list to store IDs of tokens annotated with a synset (i.e., verbs and nouns)
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'SemClass' annotation in the CAS
    for relation in cas.select('webanno.custom.SemClass'):
        # Looping through each token covered by the 'SemClass' annotation
        for token in cas.select_covered('webanno.custom.SemClass', relation):
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            list_token_synsets_ids.append(id)  # Appending the token ID to the list

print(list_token_synsets_ids)


['7489Aen', '7504Aen', '7614Aen', '7799Aen', '7807Aen', '7831Aen', '7836Aen', '583Ap', '618Ap', '627Ap', '638Ap', '2954Ap', '3256Ap', '3281Ap', '3296Ap', '3306Ap', '3991Ap', '4001Ap', '6953Ap', '6965Ap', '9077Ap', '11123Ap', '11134Ap', '11872Ap', '11884Ap', '11895Ap', '13271Ap', '13304Ap', '14094Ap', '14102Ap', '14123Ap', '16724Ap', '20796Ap', '20808Ap', '21324Ap', '21891Ap', '21919Ap', '22995Ap', '23699Ap', '27261Ap', '29687Ap', '31723Ap', '31740Ap', '32160Ap', '32474Ap', '32491Ap', '34879Ap', '34900Ap', '35126Ap', '37451Ap', '37514Ap', '37524Ap', '38703Ap', '38710Ap', '40020Ap', '40049Ap', '40257Ap', '40596Ap', '40602Ap', '43327Ap', '44239Ap', '46122Ap', '46697Ap', '46705Ap', '47736Ap', '51105Ap', '51135Ap', '51501Ap', '51515Ap', '53484Ap', '53490Ap', '54378Ap', '55177Ap', '55198Ap', '55297Ap', '55694Ap', '55702Ap', '55779Ap', '57275Ap', '57348Ap', '61136Ap', '61144Ap', '61380Ap', '61963Ap', '62018Ap', '63194Ap', '63203Ap', '65479Ap', '65487Ap', '66768Ap', '67051Ap', '69323Ap', '6953

In [38]:
# Create a dictionary where each token ID from syn_idlist is mapped to its corresponding synset from list_synsets
pred2_synset = {list_token_synsets_ids[i]: list_synsets[i] for i in range(len(list_token_synsets_ids))}

pred2_synset


{'7489Aen': ['n#06727012 extensive tract of level open land'],
 '7504Aen': ['v#00250254 pass from physical life and lose all all bodily attributes and functions necessary to sustain life'],
 '7614Aen': ['v#01075789 crash together with violent impact'],
 '7799Aen': ['r#L2533591 from any or all places'],
 '7807Aen': ['v#01654097 collect in one place'],
 '7831Aen': ['n#03176413 a body that is thrown or projected'],
 '7836Aen': ['n#07168973 an officer in command of a military unit',
  'n#00004123 a human being'],
 '583Ap': ['n#05167497 a human written or spoken language used by a community; opposed to e.g. a computer language'],
 '618Ap': ['n#07632177 a person whose occupation is teaching'],
 '627Ap': ['v#01661609 be a guiding force, as with directions or advice'],
 '638Ap': ['v#01661230 begin to deal with'],
 '2954Ap': ['v#00250254 pass from physical life and lose all all bodily attributes and functions necessary to sustain life'],
 '3256Ap': ['n#03247107 a long thin implement made of met

In [39]:
synsets_df = pd.DataFrame([(k,v) for k,v in pred2_synset.items()], columns=["ID", "VERB SEMANTICS"]) #where 'synsets_df' is a dataframe containing token IDs and synsets of all the tokens (verbs and nouns) annotated with a synset
synsets_df

Unnamed: 0,ID,VERB SEMANTICS
0,7489Aen,[n#06727012 extensive tract of level open land]
1,7504Aen,[v#00250254 pass from physical life and lose a...
2,7614Aen,[v#01075789 crash together with violent impact]
3,7799Aen,[r#L2533591 from any or all places]
4,7807Aen,[v#01654097 collect in one place]
...,...,...
2863,438653Verg,[v#01410345 run or move very quickly or hastily]
2864,440708Verg,[n#06669293 a lump of hard consolidated minera...
2865,440736Verg,[v#01343923 raise from a lower to a higher pos...
2866,441937Verg,[v#00988556 penetrate or cut through with a sh...


In [40]:
id_synsets_df = id_preverb_semantics_df.merge(synsets_df, on='ID', how='left') #where 'id_synsets_df' merges 'id_preverb_semantics_df' and 'synsets_df', basically adding the synset to the other verb parameters contained in 'id_preverb_semantics_df'
id_synsets_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING,VERB STEM,PREVERB,PREVERB SEMANTICS,VERB SEMANTICS
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False,present stem,per,"(idea of destruction/death), across",[v#00250254 pass from physical life and lose a...
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True,present stem,cum,together,[v#01075789 crash together with violent impact]
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True,present stem,cum,together,[v#01654097 collect in one place]
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False,present stem,prae,before,"[v#01661609 be a guiding force, as with direct..."
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False,supine stem,ad,to,[v#01661230 begin to deal with]
...,...,...,...,...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True,present stem,pro,forward,[v#01410345 run or move very quickly or hastily]
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False,present stem,sub,under,[v#01737682 help in a difficult situation]
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False,present stem,ob,to,[v#01410345 run or move very quickly or hastily]
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False,present stem,sub,under,[v#01343923 raise from a lower to a higher pos...


### Figure 

In [41]:
list_figure_synsets = []  # Initializing an empty list to store figure synsets
tok2_figure_synsets = dict()  # Initializing an empty dictionary to store predicted figure synsets mapped to IDs
count= 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping fthrough each 'Figuresynset' annotation in the CAS
    for relation in cas.select('webanno.custom.Figuresynset'):
        # Looping through each token covered by the 'Figuresynset' annotation
        for token in cas.select_covered('webanno.custom.Figuresynset', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            figure_synset = relation.FigSyn  # Getting the figure synset value
            
            # Checking if the ID already exists in tok2_figure_synsets dictionary
            if id in tok2_figure_synsets:
                # If ID exists, append the new figure synset to the existing list of figure synsets
                list_figure_synsets = tok2_figure_synsets[id] + figure_synset
                tok2_figure_synsets[id] = list_figure_synsets
            else:
                # If ID doesn't exist, store the figure synset directly
                tok2_figure_synsets[id] = figure_synset 

print(tok2_figure_synsets)


{'7504Aen': uima_cas_StringArray(xmiID=None, elements=['n#00004123 a human being'], type=Type(name=uima.cas.StringArray)), '7614Aen': uima_cas_StringArray(xmiID=None, elements=['n#00004123 a human being'], type=Type(name=uima.cas.StringArray)), '7807Aen': uima_cas_StringArray(xmiID=None, elements=['n#03176413 a body that is thrown or projected'], type=Type(name=uima.cas.StringArray)), '627Ap': uima_cas_StringArray(xmiID=None, elements=['n#07632177 a person whose occupation is teaching'], type=Type(name=uima.cas.StringArray)), '638Ap': uima_cas_StringArray(xmiID=None, elements=['n#00004123 a human being'], type=Type(name=uima.cas.StringArray)), '2954Ap': uima_cas_StringArray(xmiID=None, elements=['n#00004123 a human being'], type=Type(name=uima.cas.StringArray)), '3306Ap': uima_cas_StringArray(xmiID=None, elements=['n#03247107 a long thin implement made of metal or wood'], type=Type(name=uima.cas.StringArray)), '4001Ap': uima_cas_StringArray(xmiID=None, elements=['n#00004123 a human bei

In [42]:
list2_figure_synsets = []  # Initializing an empty list to store figure synsets
count= 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Figuresynset' annotation in the CAS
    for relation in cas.select('webanno.custom.Figuresynset'):
        figure_synset = relation.FigSyn  # Getting the figure synset value
        list2_figure_synsets.append(str(figure_synset))  # Appending the figure synset to the list

# Removing unwanted characters from each element in list2_figure_synsets
list2_figure_synsets = [s.replace('u.c.StringArray(elements=', '') for s in list2_figure_synsets]

print(list2_figure_synsets)


["['n#00004123 a human being'])", "['n#00004123 a human being'])", "['n#03176413 a body that is thrown or projected'])", "['n#07632177 a person whose occupation is teaching'])", "['n#00004123 a human being'])", "['n#00004123 a human being'])", "['n#03247107 a long thin implement made of metal or wood'])", "['n#00004123 a human being'])", "['n#00004123 a human being'])", "['*clause*'])", "['n#00004123 a human being'])", "['n#05560878 any strong feeling'])", "['n#04778525 a language unit by which a person or thing is known'])", "['n#01421416 primitive multicellular marine animal whose porous body is supported by a fibrous skeletal framework; usually occurs in sessile colonies'])", "['n#00004123 a human being'])", "['n#00004123 a human being'])", "['n#00004123 a human being'])", "['n#05437110 an event'])", "['n#00004123 a human being'])", "['n#00004123 a human being'])", "['n#00004123 a human being'])", "['n#00004123 a human being'])", "['n#00004123 a human being'])", "['n#00004123 a huma

In [43]:
# Removing '])' characters from each string in fslist2 and storing the modified strings in list_figure_synsets
list_figure_synsets = [l.replace(')', '') for l in list2_figure_synsets]

print(list_figure_synsets)

["['n#00004123 a human being']", "['n#00004123 a human being']", "['n#03176413 a body that is thrown or projected']", "['n#07632177 a person whose occupation is teaching']", "['n#00004123 a human being']", "['n#00004123 a human being']", "['n#03247107 a long thin implement made of metal or wood']", "['n#00004123 a human being']", "['n#00004123 a human being']", "['*clause*']", "['n#00004123 a human being']", "['n#05560878 any strong feeling']", "['n#04778525 a language unit by which a person or thing is known']", "['n#01421416 primitive multicellular marine animal whose porous body is supported by a fibrous skeletal framework; usually occurs in sessile colonies']", "['n#00004123 a human being']", "['n#00004123 a human being']", "['n#00004123 a human being']", "['n#05437110 an event']", "['n#00004123 a human being']", "['n#00004123 a human being']", "['n#00004123 a human being']", "['n#00004123 a human being']", "['n#00004123 a human being']", "['n#00004123 a human being']", "['n#000041

In [44]:
list_token_figure_synset_ids = []  # Initializing an empty list to store IDs of tokens annotated with a figure
count= 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Figuresynset' annotation in the CAS
    for relation in cas.select('webanno.custom.Figuresynset'):
        # Looping through each token covered by the 'Figuresynset' annotation
        for token in cas.select_covered('webanno.custom.Figuresynset', relation):
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            list_token_figure_synset_ids.append(id)  # Appending the token ID to the list

print(list_token_figure_synset_ids)


['7504Aen', '7614Aen', '7807Aen', '627Ap', '638Ap', '2954Ap', '3306Ap', '4001Ap', '6965Ap', '9077Ap', '11123Ap', '11895Ap', '13304Ap', '14102Ap', '14123Ap', '16724Ap', '20808Ap', '21324Ap', '21919Ap', '22995Ap', '23699Ap', '27261Ap', '29687Ap', '31740Ap', '32160Ap', '32491Ap', '34900Ap', '35126Ap', '37524Ap', '38710Ap', '40020Ap', '40049Ap', '40257Ap', '40602Ap', '43327Ap', '44239Ap', '46122Ap', '46705Ap', '47736Ap', '51135Ap', '51515Ap', '53490Ap', '54378Ap', '55177Ap', '55297Ap', '55702Ap', '55779Ap', '57275Ap', '61144Ap', '61380Ap', '62018Ap', '63203Ap', '65487Ap', '66768Ap', '67051Ap', '69323Ap', '69546Ap', '69809Ap', '71331Ap', '71779Ap', '72278Ap', '74642Ap', '75734Ap', '76958Ap', '77867Ap', '79760Ap', '80340Ap', '84767Ap', '85975Ap', '87247Ap', '89491Ap', '90315Ap', '92077Ap', '92526Ap', '93160Ap', '93188Ap', '95922Ap', '96697Ap', '98497Ap', '99825Ap', '102331Ap', '105548Ap', '105746Ap', '109735Ap', '110168Ap', '110868Ap', '111432Ap', '113591Ap', '115140Ap', '116038Ap', '117021A

In [45]:
# Create a dictionary where each token ID from list_token_figure_synset_ids is mapped to its corresponding figure synset from list_figure_synsets
tok2_figure_synsets = {list_token_figure_synset_ids[i]: list_figure_synsets[i] for i in range(len(list_token_figure_synset_ids))}

tok2_figure_synsets



{'7504Aen': "['n#00004123 a human being']",
 '7614Aen': "['n#00004123 a human being']",
 '7807Aen': "['n#03176413 a body that is thrown or projected']",
 '627Ap': "['n#07632177 a person whose occupation is teaching']",
 '638Ap': "['n#00004123 a human being']",
 '2954Ap': "['n#00004123 a human being']",
 '3306Ap': "['n#03247107 a long thin implement made of metal or wood']",
 '4001Ap': "['n#00004123 a human being']",
 '6965Ap': "['n#00004123 a human being']",
 '9077Ap': "['*clause*']",
 '11123Ap': "['n#00004123 a human being']",
 '11895Ap': "['n#05560878 any strong feeling']",
 '13304Ap': "['n#04778525 a language unit by which a person or thing is known']",
 '14102Ap': "['n#01421416 primitive multicellular marine animal whose porous body is supported by a fibrous skeletal framework; usually occurs in sessile colonies']",
 '14123Ap': "['n#00004123 a human being']",
 '16724Ap': "['n#00004123 a human being']",
 '20808Ap': "['n#00004123 a human being']",
 '21324Ap': "['n#05437110 an event']

In [46]:
figure_synsets_df = pd.DataFrame([(k,v) for k,v in tok2_figure_synsets.items()], columns=["ID", "FIGURE SEMANTICS"]) #where 'figure_synsets_df' is a dataframe containing token IDs and figure synsets of all the tokens annotated with a figure 
figure_synsets_df

Unnamed: 0,ID,FIGURE SEMANTICS
0,7504Aen,['n#00004123 a human being']
1,7614Aen,['n#00004123 a human being']
2,7807Aen,['n#03176413 a body that is thrown or projected']
3,627Ap,['n#07632177 a person whose occupation is teac...
4,638Ap,['n#00004123 a human being']
...,...,...
1478,435595Verg,['n#06888584 a female deity']
1479,436869Verg,['n#06888584 a female deity']
1480,438653Verg,['n#10169961 a serious (sometimes fatal infect...
1481,440736Verg,['n#00004123 a human being']


In [47]:
id_figure_synsets_df = id_synsets_df.merge(figure_synsets_df, on='ID', how='left') #where 'id_figure_synsets_df' merges 'id_synsets_df' and 'figure_synsets_df', basically adding the figure synset to the other parameters contained in 'id_synsets_df'
id_figure_synsets_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING,VERB STEM,PREVERB,PREVERB SEMANTICS,VERB SEMANTICS,FIGURE SEMANTICS
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False,present stem,per,"(idea of destruction/death), across",[v#00250254 pass from physical life and lose a...,['n#00004123 a human being']
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True,present stem,cum,together,[v#01075789 crash together with violent impact],['n#00004123 a human being']
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True,present stem,cum,together,[v#01654097 collect in one place],['n#03176413 a body that is thrown or projected']
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False,present stem,prae,before,"[v#01661609 be a guiding force, as with direct...",['n#07632177 a person whose occupation is teac...
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False,supine stem,ad,to,[v#01661230 begin to deal with],['n#00004123 a human being']
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True,present stem,pro,forward,[v#01410345 run or move very quickly or hastily],['n#06888584 a female deity']
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False,present stem,sub,under,[v#01737682 help in a difficult situation],['n#06888584 a female deity']
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False,present stem,ob,to,[v#01410345 run or move very quickly or hastily],['n#10169961 a serious (sometimes fatal infect...
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False,present stem,sub,under,[v#01343923 raise from a lower to a higher pos...,['n#00004123 a human being']


### Ground synset

In [48]:
list_ground_synsets = []  # Initializing an empty list to store ground synsets
tok2_ground_synsets = dict()  # Initializing an empty dictionary to store predicted ground synsets mapped to IDs
count= 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Groundsynset' annotation in the CAS
    for relation in cas.select('webanno.custom.Groundsynset'):
        # Looping through each token covered by the 'Groundsynset' annotation
        for token in cas.select_covered('webanno.custom.Groundsynset', relation):
            tok = token.get_covered_text()  # Getting the text covered by the current token
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            ground_synset = relation.GroundSyn  # Getting the ground synset value
            
            # Checking if the ID already exists in tok2_ground_synsets dictionary
            if id in tok2_ground_synsets:
                # If ID exists, append the new ground synset to the existing list of ground synsets
                list_ground_synsets = tok2_ground_synsets[id] + [ground_synset]
                tok2_ground_synsets[id] = list_ground_synsets
            else:
                # If ID doesn't exist, store the ground synset directly
                tok2_ground_synsets[id] = [ground_synset]

print(tok2_ground_synsets)


{'7807Aen': [uima_cas_StringArray(xmiID=None, elements=['n#07168973 an officer in command of a military unit', 'n#00004123 a human being'], type=Type(name=uima.cas.StringArray))], '638Ap': [uima_cas_StringArray(xmiID=None, elements=['n#05167497 a human written or spoken language used by a community; opposed to e.g. a computer language'], type=Type(name=uima.cas.StringArray))], '3306Ap': [uima_cas_StringArray(xmiID=None, elements=['n#04292200 back part of the head or skull'], type=Type(name=uima.cas.StringArray))], '4001Ap': [uima_cas_StringArray(xmiID=None, elements=['n#06382213 an urban area with a fixed boundary that is smaller than a city'], type=Type(name=uima.cas.StringArray))], '6965Ap': [uima_cas_StringArray(xmiID=None, elements=['n#04964487 a public exhibition or entertainment'], type=Type(name=uima.cas.StringArray))], '14102Ap': [uima_cas_StringArray(xmiID=None, elements=['n#06789983 a large natural stream of water (larger than a creek)'], type=Type(name=uima.cas.StringArray))

In [49]:
list_ground_synsets = []  # Initializing an empty list to store ground synsets

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Groundsynset' annotation in the CAS
    for relation in cas.select('webanno.custom.Groundsynset'):
        ground_synset = relation.GroundSyn  # Getting the ground synset value
        # Extracting the useful information from the StringArray elements
        ground_synset_values = [element.strip().strip("'") for element in ground_synset.elements]
        # Appending the ground synset values as a sublist
        list_ground_synsets.append(ground_synset_values)

print(list_ground_synsets)


[['n#07168973 an officer in command of a military unit', 'n#00004123 a human being'], ['n#05167497 a human written or spoken language used by a community; opposed to e.g. a computer language'], ['n#04292200 back part of the head or skull'], ['n#06382213 an urban area with a fixed boundary that is smaller than a city'], ['n#04964487 a public exhibition or entertainment'], ['n#06789983 a large natural stream of water (larger than a creek)'], ['n#06391772 a wild and uninhabited area'], ['n#02666884 a house for the farmer and family'], ['n#02837386 a dwelling that serves as living quarters for one or more families'], ['n#04295972 the part of the skull of a vertebrate that frames the mouth and holds the teeth'], ['n#02837386 a dwelling that serves as living quarters for one or more families'], ['n#04122028 the organ of sight (\\"peeper\\" is an informal term for \\"eye\\")'], ['n#04122028 the organ of sight (\\"peeper\\" is an informal term for \\"eye\\")'], ['n#05477241 one of a series of 

In [51]:
list_token_ground_synset_ids = []  # Initializing an empty list to store IDs of tokens annotated with a ground 
count= 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Groundsynset' annotation in the CAS
    for relation in cas.select('webanno.custom.Groundsynset'):
        # Looping through each token covered by the 'Groundsynset' annotation
        for token in cas.select_covered('webanno.custom.Groundsynset', relation):
            id = token.begin  # Getting the beginning offset of the token
            id = str(id) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            list_token_ground_synset_ids.append(id)  # Appending the token ID to the list

print(list_token_ground_synset_ids)


['7807Aen', '638Ap', '3306Ap', '4001Ap', '6965Ap', '14102Ap', '20808Ap', '21919Ap', '31740Ap', '32491Ap', '34900Ap', '37524Ap', '38710Ap', '46705Ap', '51135Ap', '53490Ap', '55702Ap', '63203Ap', '69546Ap', '71331Ap', '71779Ap', '74642Ap', '79760Ap', '84767Ap', '87247Ap', '89491Ap', '90315Ap', '93188Ap', '95922Ap', '99825Ap', '105746Ap', '109735Ap', '110168Ap', '110868Ap', '111432Ap', '113591Ap', '116038Ap', '117021Ap', '125471Ap', '129207Ap', '133136Ap', '134534Ap', '134636Ap', '140607Ap', '150834Ap', '158305Ap', '161942Ap', '162189Ap', '162465Ap', '1498Caes', '4296Caes', '4578Caes', '4854Caes', '4978Caes', '5732Caes', '5997Caes', '9426Caes', '9649Caes', '10407Caes', '10617Caes', '10862Caes', '10890Caes', '10931Caes', '11152Caes', '11818Caes', '12323Caes', '21567Caes', '24504Caes', '25610Caes', '26036Caes', '26268Caes', '26640Caes', '27386Caes', '27831Caes', '28359Caes', '29918Caes', '30767Caes', '31365Caes', '33967Caes', '34210Caes', '37157Caes', '37517Caes', '46744Caes', '46852Caes', 

In [52]:
# Creating a dictionary where each token ID from gs_idlist is mapped to its corresponding ground synset from gslist
tok2_ground_synset = {list_token_ground_synset_ids[i]: list_ground_synsets[i] for i in range(len(list_token_ground_synset_ids))}

tok2_ground_synset


{'7807Aen': ['n#07168973 an officer in command of a military unit',
  'n#00004123 a human being'],
 '638Ap': ['n#05167497 a human written or spoken language used by a community; opposed to e.g. a computer language'],
 '3306Ap': ['n#04292200 back part of the head or skull'],
 '4001Ap': ['n#06382213 an urban area with a fixed boundary that is smaller than a city'],
 '6965Ap': ['n#04964487 a public exhibition or entertainment'],
 '14102Ap': ['n#06789983 a large natural stream of water (larger than a creek)'],
 '20808Ap': ['n#06391772 a wild and uninhabited area'],
 '21919Ap': ['n#02666884 a house for the farmer and family'],
 '31740Ap': ['n#02837386 a dwelling that serves as living quarters for one or more families'],
 '32491Ap': ['n#04295972 the part of the skull of a vertebrate that frames the mouth and holds the teeth'],
 '34900Ap': ['n#02837386 a dwelling that serves as living quarters for one or more families'],
 '37524Ap': ['n#04122028 the organ of sight (\\"peeper\\" is an informal

In [53]:
ground_synsets_df = pd.DataFrame([(k,v) for k,v in tok2_ground_synset.items()], columns=["ID", "GROUND SEMANTICS"]) #where 'ground_synsets_df' is a dataframe containing token IDs and ground synsets of all the tokens annotated with a ground 
ground_synsets_df

Unnamed: 0,ID,GROUND SEMANTICS
0,7807Aen,[n#07168973 an officer in command of a militar...
1,638Ap,[n#05167497 a human written or spoken language...
2,3306Ap,[n#04292200 back part of the head or skull]
3,4001Ap,[n#06382213 an urban area with a fixed boundar...
4,6965Ap,[n#04964487 a public exhibition or entertainment]
...,...,...
654,428401Verg,[n#00004123 a human being]
655,436869Verg,[n#07127521 a male with the same parents as so...
656,438653Verg,[n#06381267 a point located with respect to su...
657,440736Verg,[n#06669293 a lump of hard consolidated minera...


In [54]:
id_ground_synsets_df = id_figure_synsets_df.merge(ground_synsets_df, on='ID', how='left') #where 'id_ground_synsets_df' merges 'id_figure_synsets_df' and 'ground_synsets_df', basically adding the ground synset to the other parameters contained in 'id_figure_synsets_df'
id_ground_synsets_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING,VERB STEM,PREVERB,PREVERB SEMANTICS,VERB SEMANTICS,FIGURE SEMANTICS,GROUND SEMANTICS
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False,present stem,per,"(idea of destruction/death), across",[v#00250254 pass from physical life and lose a...,['n#00004123 a human being'],
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True,present stem,cum,together,[v#01075789 crash together with violent impact],['n#00004123 a human being'],
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True,present stem,cum,together,[v#01654097 collect in one place],['n#03176413 a body that is thrown or projected'],[n#07168973 an officer in command of a militar...
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False,present stem,prae,before,"[v#01661609 be a guiding force, as with direct...",['n#07632177 a person whose occupation is teac...,
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False,supine stem,ad,to,[v#01661230 begin to deal with],['n#00004123 a human being'],[n#05167497 a human written or spoken language...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True,present stem,pro,forward,[v#01410345 run or move very quickly or hastily],['n#06888584 a female deity'],
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False,present stem,sub,under,[v#01737682 help in a difficult situation],['n#06888584 a female deity'],[n#07127521 a male with the same parents as so...
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False,present stem,ob,to,[v#01410345 run or move very quickly or hastily],['n#10169961 a serious (sometimes fatal infect...,[n#06381267 a point located with respect to su...
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False,present stem,sub,under,[v#01343923 raise from a lower to a higher pos...,['n#00004123 a human being'],[n#06669293 a lump of hard consolidated minera...


In [55]:
#Exporting id_ground_synsets_df to csv
id_ground_synsets_df.to_csv('Latin_without_spatial_relations.csv')

### Obtaining participants (Figure, Ground) lemmas

In [56]:
#Mapping verb token IDs to participants IDs

list_participants = []  # Initializing an empty list to store participants
tok2_participants = dict()  # Initializing an empty dictionary to store tokens mapped to their corresponding participants
count = 0  # Initializing a count variable

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Participants' annotation in the CAS
    for relation in cas.select('webanno.custom.Paticipants'):
        dep = relation.Dependent  # Dependent token of the participant relation
        tokdep = dep.get_covered_text()  # Covered text of the dependent token
        id = str(dep.begin) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
        list_participants.append(str(tokdep))  # Appending the covered text of the dependent token to srlist

        gov = relation.Governor  # Governor token of the participant relation
        toksr = gov.get_covered_text()  # Covered text of the governor token
        id2 = str(gov.begin) + file_input_abbr  # Creating a unique identifier by appending the file abbreviation to the offset of the governor token
        
        # Checking if the ID already exists in tok2_participants
        if id in tok2_participants:
            list_participants = tok2_participants[id] + [id2]  # If so, append the participant token ID to the existing list
            tok2_participants[id] = list_participants  # Update the dictionary with the appended list
        else:
            tok2_participants[id] = [id2]  # If not, create a new entry with a list containing the participant token ID

print(tok2_participants)  



{'7807Aen': ['7831Aen', '7836Aen', 'praeeunte', 'aggressus', 'perveneritis', 'obiturus', 'provenire', 'convenire', 'transeas', 'aufugi', 'abierunt', 'pervenimus', 'exire', 'evolo', 'occurrit', 'occurrit'], '627Ap': ['618Ap'], '638Ap': ['583Ap'], '4001Ap': ['3991Ap'], '6965Ap': ['6953Ap'], '11895Ap': ['11872Ap'], '13304Ap': ['13271Ap'], '14102Ap': ['14094Ap'], '20808Ap': ['20796Ap'], '21919Ap': ['21891Ap'], '31740Ap': ['31723Ap'], '32491Ap': ['32474Ap'], '34900Ap': ['34879Ap'], '37524Ap': ['37451Ap', '37514Ap', 'occurrens', 'advenit', 'subit', 'adire', 'aufugere', 'obeas', 'abis', 'accurro', 'Occurrit', 'convenissent', 'advenit', 'involo', 'procurrens', 'aggressus', 'decurrit', 'decurrit'], '38710Ap': ['38703Ap'], '40602Ap': ['40596Ap'], '46705Ap': ['46697Ap'], '51135Ap': ['51105Ap'], '51515Ap': ['51501Ap'], '53490Ap': ['53484Ap'], '55177Ap': ['55198Ap'], '55702Ap': ['55694Ap'], '57275Ap': ['57348Ap'], '61144Ap': ['61136Ap'], '62018Ap': ['61963Ap'], '63203Ap': ['63194Ap'], '65487Ap': ['

In [57]:
participants_df = pd.DataFrame([(k,v) for k,v in tok2_participants.items()], columns=["ID", "PARTICIPANT ID"])  #where 'part_df' is a dataframe containing token IDs and IDs of all the tokens annotated with a participant relation (i.e., Figure/Ground)
participants_df

Unnamed: 0,ID,PARTICIPANT ID
0,7807Aen,"[7831Aen, 7836Aen, praeeunte, aggressus, perve..."
1,627Ap,[618Ap]
2,638Ap,[583Ap]
3,4001Ap,[3991Ap]
4,6965Ap,[6953Ap]
...,...,...
906,435595Verg,[435621Verg]
907,436869Verg,"[436844Verg, 436880Verg, occurrere, subirent, ..."
908,438653Verg,[438644Verg]
909,440736Verg,[440708Verg]


In [58]:
list_participant_lemmas = []  # Initializing an empty list to store participant lemmas
tok2_participant_lemmas = dict()  # Initializing an empty dictionary to map tokens to their participant lemmas
count = 0  # Initializing a count variable

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Lemma' annotation in the CAS
    for relation in cas.select('de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma'):
        # Looping through each token covered by 'Lemma' annotation
        for token in cas.select_covered('de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma', relation):
            tok = token.get_covered_text()  # Covered text of the token
            id = str(token.begin) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            lemma = relation.value  # Value of the lemma relation
            
            # Checking if the ID already exists in tok2_participant_lemmas
            if id in tok2_participant_lemmas:
                list_lemmas = tok2_participant_lemmas[id] + lemma  # If so, append the value to the existing list of lemma fillers
                tok2_participant_lemmas[id] = list_lemmas  # Update the dictionary with the appended list
            else:
                tok2_participant_lemmas[id] = lemma  # If not, create a new entry with the lemma filler value

print(tok2_participant_lemmas)  


{'7489Aen': 'campus', '7504Aen': 'pereo', '7614Aen': 'concurro', '7799Aen': 'undique', '7807Aen': 'convenio', '7831Aen': 'telum', '7836Aen': 'tribunus', '583Ap': 'sermo', '618Ap': 'magister', '627Ap': 'praeeo', '638Ap': 'adgredior', '2954Ap': 'intereo', '3256Ap': 'bacillum', '3278Ap': 'ad', '3281Ap': 'occipitium', '3292Ap': 'per', '3296Ap': 'ingluvies', '3306Ap': 'subeo', '3991Ap': 'civitas', '4001Ap': 'pervenio', '6953Ap': 'spectaculum', '6965Ap': 'obeo', '9077Ap': 'provenio', '11123Ap': 'aufugio', '11134Ap': 'istinc', '11872Ap': 'affectus', '11881Ap': 'in', '11884Ap': 'contrarius', '11895Ap': 'provenio', '13271Ap': 'nomen', '13304Ap': 'convenio', '14090Ap': 'per', '14094Ap': 'fluvius', '14102Ap': 'transeo', '14123Ap': 'abeo', '16724Ap': 'adgredior', '20774Ap': 'per', '20796Ap': 'solitudo', '20808Ap': 'aufugio', '21324Ap': 'provenio', '21888Ap': 'ad', '21891Ap': 'villula', '21919Ap': 'abeo', '22995Ap': 'progredior', '23699Ap': 'invenio', '27261Ap': 'abeo', '29687Ap': 'circumeo', '3172

In [59]:
participant_lemmas_df = pd.DataFrame([(k,v) for k,v in tok2_participant_lemmas.items()], columns=["PARTICIPANT ID", "PARTICIPANT LEMMA"]) #where 'part_lem_df' is a dataframe containing IDs of all the tokens annotated with a participant relation (i.e., Figure/Ground) and their lemma
participant_lemmas_df

Unnamed: 0,PARTICIPANT ID,PARTICIPANT LEMMA
0,7489Aen,campus
1,7504Aen,pereo
2,7614Aen,concurro
3,7799Aen,undique
4,7807Aen,convenio
...,...,...
3282,440708Verg,ille
3283,440736Verg,subeo
3284,441917Verg,per
3285,441937Verg,transeo


In [60]:
# Initialize a new dictionary to store the mapping of IDs to lemmas
pred2_participant_lemma = {}

# Iterate over the keys of pred2_lemmas
for id, lemma in pred2_lemmas.items():
    # Check if the ID exists in tok2_participants
    if id in tok2_participants:
        # If so, find the corresponding participant IDs
        participant_ids = tok2_participants[id]
        # Check if there are participant IDs
        if participant_ids:
            # Initialize a list to store the lemmas corresponding to the participant IDs
            participant_lemmas = []
            for participant_id in participant_ids:
                # Lookup the lemma corresponding to the participant ID
                if participant_id in pred2_lemmas:
                    participant_lemma = pred2_lemmas[participant_id]
                    participant_lemmas.append(participant_lemma)
            # Store the list of lemmas corresponding to the participant IDs
            pred2_participant_lemma[id] = participant_lemmas

print(pred2_participant_lemma)


{'7807Aen': ['telum', 'tribunus'], '627Ap': ['magister'], '638Ap': ['sermo'], '4001Ap': ['civitas'], '6965Ap': ['spectaculum'], '11895Ap': ['affectus'], '13304Ap': ['nomen'], '14102Ap': ['fluvius'], '20808Ap': ['solitudo'], '21919Ap': ['villula'], '31740Ap': ['domus'], '32491Ap': ['fauces'], '34900Ap': ['hospitium'], '37524Ap': ['pars', 'lumen'], '38710Ap': ['oculus'], '40602Ap': ['Liber'], '46705Ap': ['fluctus'], '51135Ap': ['locus'], '51515Ap': ['mortuus'], '53490Ap': ['munus'], '55177Ap': ['bestia'], '55702Ap': ['cadaver'], '57275Ap': ['senex'], '61144Ap': ['reliquus'], '62018Ap': ['dies'], '63203Ap': ['latro'], '65487Ap': ['populus'], '69546Ap': ['ego'], '71331Ap': ['mulier', 'theatrum'], '71779Ap': ['fortuna'], '72278Ap': ['puer'], '74642Ap': ['domus', 'magistratus'], '79760Ap': ['is'], '84767Ap': ['ego', 'Fotis'], '87247Ap': ['iste', 'cogitatio'], '89491Ap': ['asinus'], '90315Ap': ['asinus'], '93188Ap': ['auxilium'], '95922Ap': ['scaevitas', 'conatus'], '96697Ap': ['iuvenis'], '9

In [61]:
id_pred2_token_participant_df = pd.DataFrame([(k,v) for k,v in pred2_participant_lemma.items()], columns=["ID", "PARTICIPANT LEMMA"]) #where 'id_pred2_token_participant_df' is a dataframe containing token IDs and IDs of all the tokens annotated with a Spatial relation
id_pred2_token_participant_df

Unnamed: 0,ID,PARTICIPANT LEMMA
0,7807Aen,"[telum, tribunus]"
1,627Ap,[magister]
2,638Ap,[sermo]
3,4001Ap,[civitas]
4,6965Ap,[spectaculum]
...,...,...
906,435595Verg,[dea]
907,436869Verg,"[Iuturna, frater]"
908,438653Verg,[Iuturna]
909,440736Verg,[ille]


### Participant role

In [63]:
list_participant_role = []  # Initializing an empty list to store frames
tok2_participant_role = dict()  # Initializing an empty dictionary to map tokens to their frames
count = 0  # Initializing a count variable

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Participants' annotation in the CAS
    for relation in cas.select('webanno.custom.Paticipants'):
        dep = relation.Dependent  # Dependent token of the participant relation
        tokdep = dep.get_covered_text()  # Covered text of the dependent token
        id = str(dep.begin) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
        list_participants.append(str(tokdep))  # Appending the covered text of the dependent token to sptlist

        role = relation.Frame  # Frame associated with the participant relation
        
        # Checking if the ID already exists in tok2_participant_role
        if id in tok2_participant_role:
            list_participant_role = tok2_participant_role[id] + [role]  # If so, append the frame to the existing list of frames
            tok2_participant_role[id] = list_participant_role  # Update the dictionary with the appended list
        else:
            tok2_participant_role[id] = [role]  # If not, create a new entry with a list containing the frame

print(tok2_participant_role)  


{'7807Aen': ['Ground', 'Figure'], '627Ap': ['Figure'], '638Ap': ['Ground'], '4001Ap': ['Ground'], '6965Ap': ['Ground'], '11895Ap': ['Figure'], '13304Ap': ['Figure'], '14102Ap': ['Ground'], '20808Ap': ['Ground'], '21919Ap': ['Ground'], '31740Ap': ['Ground'], '32491Ap': ['Figure'], '34900Ap': ['Ground'], '37524Ap': ['Ground', 'Figure'], '38710Ap': ['Ground'], '40602Ap': ['Figure'], '46705Ap': ['Ground'], '51135Ap': ['Ground'], '51515Ap': ['Figure'], '53490Ap': ['Ground'], '55177Ap': ['Figure'], '55702Ap': ['Ground'], '57275Ap': ['Figure'], '61144Ap': ['Figure'], '62018Ap': ['Figure'], '63203Ap': ['Ground'], '65487Ap': ['Figure'], '69546Ap': ['Ground'], '71331Ap': ['Ground', 'Figure'], '71779Ap': ['Ground'], '72278Ap': ['Figure'], '74642Ap': ['Figure', 'Ground'], '79760Ap': ['Ground'], '84767Ap': ['Ground', 'Figure'], '87247Ap': ['Ground', 'Figure'], '89491Ap': ['Ground'], '90315Ap': ['Ground'], '93188Ap': ['Ground'], '95922Ap': ['Figure', 'Ground'], '96697Ap': ['Figure'], '99825Ap': ['Gr

In [64]:
participant_role_df = pd.DataFrame([(k,v) for k,v in tok2_participant_role.items()], columns=["ID", "PARTICIPANT ROLE"]) #where 'participant_role_df' is a dataframe containing IDs of all the tokens annotated with a participant relation (i.e., Figure/Ground) and their role
participant_role_df

Unnamed: 0,ID,PARTICIPANT ROLE
0,7807Aen,"[Ground, Figure]"
1,627Ap,[Figure]
2,638Ap,[Ground]
3,4001Ap,[Ground]
4,6965Ap,[Ground]
...,...,...
906,435595Verg,[Figure]
907,436869Verg,"[Ground, Figure]"
908,438653Verg,[Ground]
909,440736Verg,[Ground]


In [65]:
id_participant_role_lemma_df = id_pred2_token_participant_df.merge(participant_role_df, on='ID', how='left')
id_participant_role_lemma_df

Unnamed: 0,ID,PARTICIPANT LEMMA,PARTICIPANT ROLE
0,7807Aen,"[telum, tribunus]","[Ground, Figure]"
1,627Ap,[magister],[Figure]
2,638Ap,[sermo],[Ground]
3,4001Ap,[civitas],[Ground]
4,6965Ap,[spectaculum],[Ground]
...,...,...,...
906,435595Verg,[dea],[Figure]
907,436869Verg,"[Iuturna, frater]","[Ground, Figure]"
908,438653Verg,[Iuturna],[Ground]
909,440736Verg,[ille],[Ground]


In [66]:
verb_with_participant_roles_lemmas_df = id_ground_synsets_df.merge(id_participant_role_lemma_df, on='ID', how='left')
verb_with_participant_roles_lemmas_df

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING,VERB STEM,PREVERB,PREVERB SEMANTICS,VERB SEMANTICS,FIGURE SEMANTICS,GROUND SEMANTICS,PARTICIPANT LEMMA,PARTICIPANT ROLE
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False,present stem,per,"(idea of destruction/death), across",[v#00250254 pass from physical life and lose a...,['n#00004123 a human being'],,,
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True,present stem,cum,together,[v#01075789 crash together with violent impact],['n#00004123 a human being'],,,
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True,present stem,cum,together,[v#01654097 collect in one place],['n#03176413 a body that is thrown or projected'],[n#07168973 an officer in command of a militar...,"[telum, tribunus]","[Ground, Figure]"
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False,present stem,prae,before,"[v#01661609 be a guiding force, as with direct...",['n#07632177 a person whose occupation is teac...,,[magister],[Figure]
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False,supine stem,ad,to,[v#01661230 begin to deal with],['n#00004123 a human being'],[n#05167497 a human written or spoken language...,[sermo],[Ground]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True,present stem,pro,forward,[v#01410345 run or move very quickly or hastily],['n#06888584 a female deity'],,[dea],[Figure]
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False,present stem,sub,under,[v#01737682 help in a difficult situation],['n#06888584 a female deity'],[n#07127521 a male with the same parents as so...,"[Iuturna, frater]","[Ground, Figure]"
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False,present stem,ob,to,[v#01410345 run or move very quickly or hastily],['n#10169961 a serious (sometimes fatal infect...,[n#06381267 a point located with respect to su...,[Iuturna],[Ground]
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False,present stem,sub,under,[v#01343923 raise from a lower to a higher pos...,['n#00004123 a human being'],[n#06669293 a lump of hard consolidated minera...,[ille],[Ground]


In [67]:
verb_with_participant_roles_lemmas_df.to_csv('Latin_with_participants_without_spatial_relations.csv')

### Mapping IDs of verb tokens onto IDs of spatial relations

In [68]:
# Initializing an empty list to store spatial relations covered text
list_spatial_relations = []

# Initializing an empty dictionary to map tokens to their spatial relations with unique identifiers
tok2_spatial_relations = dict()

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Current file name
    file_input_abbr = files_abbreviated[i]  # Abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file

    # Looping through each 'Spatiality' annotation in the CAS
    for relation in cas.select('webanno.custom.Spatiality'):
        dep = relation.Dependent  # Dependent token of the spatial relation
        tokdep = dep.get_covered_text()  # Covered text of the dependent token
        id = str(dep.begin) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
        list_spatial_relations.append(str(tokdep))  # Appending the covered text of the dependent token to list_spatial_relations

        gov = relation.Governor  # Governor token of the spatial relation
        tokgov = gov.get_covered_text()  # Covered text of the governor token
        id2 = str(gov.begin) + file_input_abbr # Creating a unique identifier by appending the file abbreviation to the offset of the governor token
        
        if id in tok2_spatial_relations:  # Checking if the ID already exists in tok2_spatial_relations
            tok2_spatial_relations[id].append(id2)  # If so, append the unique identifier to the existing list
        else:
            tok2_spatial_relations[id] = [id2]  # If not, create a new entry with a list containing the unique identifier

print(tok2_spatial_relations)


{'7504Aen': ['7489Aen'], '7807Aen': ['7836Aen', '7799Aen'], '638Ap': ['583Ap'], '3306Ap': ['3296Ap', '3281Ap'], '4001Ap': ['3991Ap'], '6965Ap': ['6953Ap'], '11123Ap': ['11134Ap'], '11895Ap': ['11884Ap'], '14102Ap': ['14094Ap'], '20808Ap': ['20796Ap'], '21919Ap': ['21891Ap'], '31740Ap': ['31723Ap'], '32491Ap': ['32474Ap'], '34900Ap': ['34879Ap'], '37524Ap': ['37514Ap'], '38710Ap': ['38703Ap'], '46705Ap': ['46697Ap'], '51135Ap': ['51105Ap'], '55702Ap': ['55694Ap'], '63203Ap': ['63194Ap'], '69546Ap': ['69535Ap'], '71331Ap': ['71247Ap'], '71779Ap': ['71770Ap'], '74642Ap': ['74628Ap'], '84767Ap': ['84764Ap'], '87247Ap': ['87232Ap'], '93188Ap': ['93172Ap'], '99825Ap': ['99839Ap'], '105746Ap': ['105738Ap'], '110168Ap': ['110161Ap'], '111432Ap': ['111361Ap'], '113591Ap': ['113574Ap'], '116038Ap': ['116024Ap'], '117021Ap': ['117034Ap'], '125471Ap': ['125451Ap'], '133136Ap': ['133128Ap'], '134636Ap': ['134653Ap'], '140607Ap': ['140619Ap'], '149618Ap': ['149592Ap'], '150834Ap': ['150825Ap'], '158

In [69]:
tok2_spatial_relations_id_df = pd.DataFrame([(k,v) for k,v in tok2_spatial_relations.items()], columns=["ID", "SPATIAL RELATION ID"]) #where 'tok2_spatial_relations_id_df' is a dataframe containing token IDs and IDs of all the tokens annotated with a Spatial relation
tok2_spatial_relations_id_df

Unnamed: 0,ID,SPATIAL RELATION ID
0,7504Aen,[7489Aen]
1,7807Aen,"[7836Aen, 7799Aen]"
2,638Ap,[583Ap]
3,3306Ap,"[3296Ap, 3281Ap]"
4,4001Ap,[3991Ap]
...,...,...
683,424445Verg,[424413Verg]
684,428401Verg,[428307Verg]
685,436869Verg,[436880Verg]
686,438653Verg,[438644Verg]


In [70]:
pred2_spatial_relation_lemmas = dict()  # Initialize an empty dictionary to store mappings between predicate IDs and their spatial relation lemmas

# Iterate through each predicate ID in the dictionary tok2_spatial_relations
for pred_id in tok2_spatial_relations:
    spatial_relation_ids = tok2_spatial_relations[pred_id]  # Retrieve the list of spatial relation IDs associated with the current predicate ID
    spatial_relation_lemmas = list()  # Initialize an empty list to store lemmas corresponding to spatial relations
    
    # Iterate through each spatial relation ID in the list of IDs
    for spatial_relation in spatial_relation_ids:
        spatial_relation_lemma = pred2_lemmas[spatial_relation]  # Retrieve the lemma associated with the current spatial relation ID from pred2_lemmas dictionary
        spatial_relation_lemmas.append(spatial_relation_lemma)  # Append the retrieved lemma to the list of lemmas for the current predicate ID
        pred2_spatial_relation_lemmas[pred_id] = spatial_relation_lemmas  # Assign the list of lemmas to the current predicate ID in the pred2_spatial_relation_lemmas dictionary

print(pred2_spatial_relation_lemmas)  


{'7504Aen': ['campus'], '7807Aen': ['tribunus', 'undique'], '638Ap': ['sermo'], '3306Ap': ['ingluvies', 'occipitium'], '4001Ap': ['civitas'], '6965Ap': ['spectaculum'], '11123Ap': ['istinc'], '11895Ap': ['contrarius'], '14102Ap': ['fluvius'], '20808Ap': ['solitudo'], '21919Ap': ['villula'], '31740Ap': ['domus'], '32491Ap': ['fauces'], '34900Ap': ['hospitium'], '37524Ap': ['lumen'], '38710Ap': ['oculus'], '46705Ap': ['fluctus'], '51135Ap': ['locus'], '55702Ap': ['cadaver'], '63203Ap': ['latro'], '69546Ap': ['ego'], '71331Ap': ['theatrum'], '71779Ap': ['fortuna'], '74642Ap': ['domus'], '84767Ap': ['ego'], '87247Ap': ['cogitatio'], '93188Ap': ['auxilium'], '99825Ap': ['locus'], '105746Ap': ['humerus'], '110168Ap': ['epulae'], '111432Ap': ['confinium'], '113591Ap': ['porta'], '116038Ap': ['meta'], '117021Ap': ['domus'], '125471Ap': ['voluptas'], '133136Ap': ['nuptiae'], '134636Ap': ['os'], '140607Ap': ['domus'], '149618Ap': ['unde'], '150834Ap': ['scopulum'], '158305Ap': ['tu'], '161942Ap'

In [71]:
spatial_relation_lemma_df = pd.DataFrame([(k,v) for k,v in pred2_spatial_relation_lemmas.items()], columns=["ID", "SPATIAL RELATION LEMMA"]) #where 'spatial_relation_lemma_df' is a dataframe containing token IDs and IDs of all the tokens annotated with a Spatial relation
spatial_relation_lemma_df

Unnamed: 0,ID,SPATIAL RELATION LEMMA
0,7504Aen,[campus]
1,7807Aen,"[tribunus, undique]"
2,638Ap,[sermo]
3,3306Ap,"[ingluvies, occipitium]"
4,4001Ap,[civitas]
...,...,...
683,424445Verg,[Hyllus]
684,428401Verg,[hic]
685,436869Verg,[frater]
686,438653Verg,[Iuturna]


In [72]:
list_spatial_relation_roles = []  # Initializing an empty list to store frames
tok2_spatial_relation_roles = dict()  # Initializing an empty dictionary to map tokens to their frames
count = 0  # Initializing a count variable

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Getting the current file name
    file_input_abbr = files_abbreviated[i]  # Getting the abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Participants' annotation in the CAS
    for relation in cas.select('webanno.custom.Spatiality'):
        dep = relation.Dependent  # Dependent token of the participant relation
        tokdep = dep.get_covered_text()  # Covered text of the dependent token
        id = str(dep.begin) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
        list_participants.append(str(tokdep))  # Appending the covered text of the dependent token to sptlist

        spatial_relation = relation.Spatials  # Frame associated with the participant relation
        
        # Checking if the ID already exists in tok2_spatial_relation_roles
        if id in tok2_spatial_relation_roles:
            list_spatial_relation_roles = tok2_spatial_relation_roles[id] + [spatial_relation]  # If so, append the frame to the existing list of frames
            tok2_spatial_relation_roles[id] = list_spatial_relation_roles  # Update the dictionary with the appended list
        else:
            tok2_spatial_relation_roles[id] = [spatial_relation]  # If not, create a new entry with a list containing the frame

print(tok2_spatial_relation_roles)  



{'7504Aen': ['LOCATION'], '7807Aen': ['GOAL', 'SOURCE'], '638Ap': ['GOAL'], '3306Ap': ['PATH', 'GOAL'], '4001Ap': ['GOAL'], '6965Ap': ['GOAL'], '11123Ap': ['SOURCE'], '11895Ap': ['GOAL'], '14102Ap': ['PATH'], '20808Ap': ['PATH'], '21919Ap': ['GOAL'], '31740Ap': ['GOAL'], '32491Ap': ['SOURCE'], '34900Ap': ['GOAL'], '37524Ap': ['GOAL'], '38710Ap': ['GOAL'], '46705Ap': ['GOAL'], '51135Ap': ['GOAL'], '55702Ap': ['GOAL'], '63203Ap': ['GOAL'], '69546Ap': ['GOAL'], '71331Ap': ['PATH'], '71779Ap': ['GOAL'], '74642Ap': ['GOAL'], '84767Ap': ['GOAL'], '87247Ap': ['GOAL'], '93188Ap': ['GOAL'], '99825Ap': ['GOAL'], '105746Ap': ['GOAL'], '110168Ap': ['GOAL'], '111432Ap': ['GOAL'], '113591Ap': ['GOAL'], '116038Ap': ['GOAL'], '117021Ap': ['SOURCE'], '125471Ap': ['GOAL'], '133136Ap': ['GOAL'], '134636Ap': ['GOAL'], '140607Ap': ['SOURCE'], '149618Ap': ['SOURCE'], '150834Ap': ['GOAL'], '158305Ap': ['GOAL'], '161942Ap': ['GOAL'], '162189Ap': ['GOAL'], '162465Ap': ['PATH'], '1498Caes': ['SOURCE'], '4296Cae

In [73]:
spatial_relation_role_df = pd.DataFrame([(k,v) for k,v in tok2_spatial_relation_roles.items()], columns=["ID", "SPATIAL RELATION ROLE"]) #where 'spatial_relation_role_df' is a dataframe containing token IDs and IDs of all the tokens annotated with a Spatial relation
spatial_relation_role_df

Unnamed: 0,ID,SPATIAL RELATION ROLE
0,7504Aen,[LOCATION]
1,7807Aen,"[GOAL, SOURCE]"
2,638Ap,[GOAL]
3,3306Ap,"[PATH, GOAL]"
4,4001Ap,[GOAL]
...,...,...
683,424445Verg,[GOAL]
684,428401Verg,[GOAL]
685,436869Verg,[GOAL]
686,438653Verg,[GOAL]


In [74]:
id_spatial_relation_roles_lemmas_df = spatial_relation_role_df.merge(spatial_relation_lemma_df, on='ID', how='left') #where 'id_spatial_relation_roles_lemmas_df' merges 'spatial_relation_lemma_df' and 'spatial_relation_role_df'
id_spatial_relation_roles_lemmas_df

Unnamed: 0,ID,SPATIAL RELATION ROLE,SPATIAL RELATION LEMMA
0,7504Aen,[LOCATION],[campus]
1,7807Aen,"[GOAL, SOURCE]","[tribunus, undique]"
2,638Ap,[GOAL],[sermo]
3,3306Ap,"[PATH, GOAL]","[ingluvies, occipitium]"
4,4001Ap,[GOAL],[civitas]
...,...,...,...
683,424445Verg,[GOAL],[Hyllus]
684,428401Verg,[GOAL],[hic]
685,436869Verg,[GOAL],[frater]
686,438653Verg,[GOAL],[Iuturna]


In [75]:
pred2_spatial_relation_synsets = dict()  # Initialize an empty dictionary to store mappings between predicate IDs and their spatial relation lemmas

# Iterate through each predicate ID in the dictionary tok2_spatial_relations
for pred_id in tok2_spatial_relations:
    spatial_relation_ids = tok2_spatial_relations[pred_id]  # Retrieve the list of spatial relation IDs associated with the current predicate ID
    spatial_relation_synsets = list()  # Initialize an empty list to store lemmas corresponding to spatial relations
    
    # Iterate through each spatial relation ID in the list of IDs
    for spatial_relation in spatial_relation_ids:
        spatial_relation_synset = tok2_synsets[spatial_relation]  # Retrieve the lemma associated with the current spatial relation ID from pred2_lemmas dictionary
        # Extract only the lemma string from the object and append it to the list of lemmas for the current predicate ID
        spatial_relation_synsets.append(spatial_relation_synset.elements[0])  
    pred2_spatial_relation_synsets[pred_id] = spatial_relation_synsets  # Assign the list of lemmas to the current predicate ID in the pred2_spatial_relation_synsets dictionary
        
print(pred2_spatial_relation_synsets)  


{'7504Aen': ['n#06727012 extensive tract of level open land'], '7807Aen': ['n#07168973 an officer in command of a military unit', 'r#L2533591 from any or all places'], '638Ap': ['n#05167497 a human written or spoken language used by a community; opposed to e.g. a computer language'], '3306Ap': ['n#04296952 the passage to the stomach and lungs; in the front part of the neck below the chin and above the collarbone', 'n#04292200 back part of the head or skull'], '4001Ap': ['n#06382213 an urban area with a fixed boundary that is smaller than a city'], '6965Ap': ['n#04964487 a public exhibition or entertainment'], '11123Ap': ['r#00041436 (archaic) from this place'], '11895Ap': ['n#09984290 a relation of direct opposition'], '14102Ap': ['n#06789983 a large natural stream of water (larger than a creek)'], '20808Ap': ['n#06391772 a wild and uninhabited area'], '21919Ap': ['n#02666884 a house for the farmer and family'], '31740Ap': ['n#02837386 a dwelling that serves as living quarters for one 

In [76]:
spatial_relation_synset_df = pd.DataFrame([(k,v) for k,v in pred2_spatial_relation_synsets.items()], columns=["ID", "SPATIAL RELATION SEMANTICS"]) #where 'spatial_relation_synset_df' is a dataframe containing verb IDs and semantics of all the tokens annotated with a Spatial relation
spatial_relation_synset_df

Unnamed: 0,ID,SPATIAL RELATION SEMANTICS
0,7504Aen,[n#06727012 extensive tract of level open land]
1,7807Aen,[n#07168973 an officer in command of a militar...
2,638Ap,[n#05167497 a human written or spoken language...
3,3306Ap,[n#04296952 the passage to the stomach and lun...
4,4001Ap,[n#06382213 an urban area with a fixed boundar...
...,...,...
683,424445Verg,[n#00004123 a human being]
684,428401Verg,[n#00004123 a human being]
685,436869Verg,[n#07127521 a male with the same parents as so...
686,438653Verg,[n#06381267 a point located with respect to su...


In [77]:
id_spatial_relation_roles_lemmas_synsets_df = id_spatial_relation_roles_lemmas_df.merge(spatial_relation_synset_df, on='ID', how='left') #where 'id_spatial_relation_roles_lemmas_synsets_df' merges 'id_spatial_relation_roles_lemmas_df' and 'spatial_relation_synset_df'
id_spatial_relation_roles_lemmas_synsets_df

Unnamed: 0,ID,SPATIAL RELATION ROLE,SPATIAL RELATION LEMMA,SPATIAL RELATION SEMANTICS
0,7504Aen,[LOCATION],[campus],[n#06727012 extensive tract of level open land]
1,7807Aen,"[GOAL, SOURCE]","[tribunus, undique]",[n#07168973 an officer in command of a militar...
2,638Ap,[GOAL],[sermo],[n#05167497 a human written or spoken language...
3,3306Ap,"[PATH, GOAL]","[ingluvies, occipitium]",[n#04296952 the passage to the stomach and lun...
4,4001Ap,[GOAL],[civitas],[n#06382213 an urban area with a fixed boundar...
...,...,...,...,...
683,424445Verg,[GOAL],[Hyllus],[n#00004123 a human being]
684,428401Verg,[GOAL],[hic],[n#00004123 a human being]
685,436869Verg,[GOAL],[frater],[n#07127521 a male with the same parents as so...
686,438653Verg,[GOAL],[Iuturna],[n#06381267 a point located with respect to su...


In [78]:
# Initializing an empty list to store spatial relations covered text
list_spatial_relations = []

# Initializing an empty dictionary to map tokens to their spatial relations with unique identifiers
tok2_spatial_relations = dict()

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Current file name
    file_input_abbr = files_abbreviated[i]  # Abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file

    # Looping through each 'Spatiality' annotation in the CAS
    for relation in cas.select('webanno.custom.Spatiality'):
        dep = relation.Dependent  # Dependent token of the spatial relation
        tokdep = dep.get_covered_text()  # Covered text of the dependent token
        id = str(dep.begin) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
        list_spatial_relations.append(str(tokdep))  # Appending the covered text of the dependent token to list_spatial_relations

        gov = relation.Governor  # Governor token of the spatial relation
        tokgov = gov.get_covered_text()  # Covered text of the governor token
        id2 = str(gov.begin) + file_input_abbr # Creating a unique identifier by appending the file abbreviation to the offset of the governor token
        
        if id in tok2_spatial_relations:  # Checking if the ID already exists in tok2_spatial_relations
            tok2_spatial_relations[id].append(id2)  # If so, append the unique identifier to the existing list
        else:
            tok2_spatial_relations[id] = [id2]  # If not, create a new entry with a list containing the unique identifier

print(tok2_spatial_relations)



{'7504Aen': ['7489Aen'], '7807Aen': ['7836Aen', '7799Aen'], '638Ap': ['583Ap'], '3306Ap': ['3296Ap', '3281Ap'], '4001Ap': ['3991Ap'], '6965Ap': ['6953Ap'], '11123Ap': ['11134Ap'], '11895Ap': ['11884Ap'], '14102Ap': ['14094Ap'], '20808Ap': ['20796Ap'], '21919Ap': ['21891Ap'], '31740Ap': ['31723Ap'], '32491Ap': ['32474Ap'], '34900Ap': ['34879Ap'], '37524Ap': ['37514Ap'], '38710Ap': ['38703Ap'], '46705Ap': ['46697Ap'], '51135Ap': ['51105Ap'], '55702Ap': ['55694Ap'], '63203Ap': ['63194Ap'], '69546Ap': ['69535Ap'], '71331Ap': ['71247Ap'], '71779Ap': ['71770Ap'], '74642Ap': ['74628Ap'], '84767Ap': ['84764Ap'], '87247Ap': ['87232Ap'], '93188Ap': ['93172Ap'], '99825Ap': ['99839Ap'], '105746Ap': ['105738Ap'], '110168Ap': ['110161Ap'], '111432Ap': ['111361Ap'], '113591Ap': ['113574Ap'], '116038Ap': ['116024Ap'], '117021Ap': ['117034Ap'], '125471Ap': ['125451Ap'], '133136Ap': ['133128Ap'], '134636Ap': ['134653Ap'], '140607Ap': ['140619Ap'], '149618Ap': ['149592Ap'], '150834Ap': ['150825Ap'], '158

In [79]:
#Spatial relation expression

list_spatial_relation_expressions = [] # Initializing a list to store expressions
tok2_spatial_relation_expression = dict() # Initializing a dictionary to map tokens to their expressions
count = 0 # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Current file name
    file_input_abbr = files_abbreviated[i]  # Abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Expressedby' annotation in the CAS
    for relation in cas.select('webanno.custom.Expressedby'):
        # Looping through each token covered by 'Expressedby' annotation
        for token in cas.select_covered('webanno.custom.Expressedby', relation):
            tok = token.get_covered_text()  # Covered text of the token
            id = str(token.begin) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            spatial_relation_expression = relation.preposition  # Preposition associated with the expression
            
            # Checking if the ID already exists in tok2_spatial_relation_expression
            if id in tok2_spatial_relation_expression:
                tok2_spatial_relation_expression = tok2_spatial_relation_expression[id] + [spatial_relation_expression]
                tok2_spatial_relation_expression[id] = list_spatial_relation_expressions
                #tok2_spatial_relation_expression[id].append(spatial_relation_expression)  # If so, append the preposition to the existing list of expressions
            else:
                tok2_spatial_relation_expression[id] = [spatial_relation_expression]  # If not, create a new entry with the expression list

print(tok2_spatial_relation_expression)



{'7489Aen': ['ABL'], '7799Aen': ['adverb'], '7836Aen': ['DAT'], '583Ap': ['ACC'], '3281Ap': ['ad + ACC'], '3296Ap': ['per + ACC'], '3991Ap': ['ACC'], '6953Ap': ['ACC'], '11134Ap': ['adverb'], '11884Ap': ['in + ACC'], '14094Ap': ['per + ACC'], '20796Ap': ['per + ACC'], '21891Ap': ['ad + ACC'], '31723Ap': ['ad + ACC'], '32474Ap': ['de + ABL'], '34879Ap': ['ad + ACC'], '37514Ap': ['DAT'], '38703Ap': ['DAT'], '46697Ap': ['ACC'], '51105Ap': ['ACC'], '53484Ap': ['ACC'], '55694Ap': ['ACC'], '63194Ap': ['ACC'], '69535Ap': ['ACC'], '71247Ap': ['per + ACC'], '71770Ap': ['DAT'], '74628Ap': ['ACC'], '84764Ap': ['ACC'], '87232Ap': ['DAT'], '93172Ap': ['ad + ACC'], '99839Ap': ['ad + ACC'], '105738Ap': ['ACC'], '109743Ap': ['ACC'], '110161Ap': ['ACC'], '110876Ap': ['ACC'], '111361Ap': ['ad + ACC'], '113574Ap': ['ACC'], '116024Ap': ['ACC'], '117034Ap': ['ex + ABL'], '125451Ap': ['in + ACC'], '133128Ap': ['ACC'], '134492Ap': ['ACC'], '134653Ap': ['ob + ACC'], '140619Ap': ['ex + ABL'], '149592Ap': ['adv

In [80]:
# Initialize a new dictionary to store the mapping of identifiers to expressions
pred2_spatial_relation_expression = {}

# Iterate over the keys and values of tok2_spatial_relations
for identifier, ids in tok2_spatial_relations.items():
    expressions = []  # Initialize an empty list to store expressions for the current identifier
    for id in ids:
        # Check if the id exists in tok2_spatial_relation_expression
        if id in tok2_spatial_relation_expression:
            expressions.extend(tok2_spatial_relation_expression[id])  # Extend the expressions list with expressions for the current id
    
    # Add the list of expressions to the combined dictionary only if it's not empty
    if expressions:
        pred2_spatial_relation_expression[identifier] = expressions

print(pred2_spatial_relation_expression)


{'7504Aen': ['ABL'], '7807Aen': ['DAT', 'adverb'], '638Ap': ['ACC'], '3306Ap': ['per + ACC', 'ad + ACC'], '4001Ap': ['ACC'], '6965Ap': ['ACC'], '11123Ap': ['adverb'], '11895Ap': ['in + ACC'], '14102Ap': ['per + ACC'], '20808Ap': ['per + ACC'], '21919Ap': ['ad + ACC'], '31740Ap': ['ad + ACC'], '32491Ap': ['de + ABL'], '34900Ap': ['ad + ACC'], '37524Ap': ['DAT'], '38710Ap': ['DAT'], '46705Ap': ['ACC'], '51135Ap': ['ACC'], '55702Ap': ['ACC'], '63203Ap': ['ACC'], '69546Ap': ['ACC'], '71331Ap': ['per + ACC'], '71779Ap': ['DAT'], '74642Ap': ['ACC'], '84767Ap': ['ACC'], '87247Ap': ['DAT'], '93188Ap': ['ad + ACC'], '99825Ap': ['ad + ACC'], '105746Ap': ['ACC'], '110168Ap': ['ACC'], '111432Ap': ['ad + ACC'], '113591Ap': ['ACC'], '116038Ap': ['ACC'], '117021Ap': ['ex + ABL'], '125471Ap': ['in + ACC'], '133136Ap': ['ACC'], '134636Ap': ['ob + ACC'], '140607Ap': ['ex + ABL'], '149618Ap': ['adverb'], '150834Ap': ['ACC'], '158305Ap': ['DAT'], '161942Ap': ['ad + ACC'], '162189Ap': ['ad + ACC'], '162465

In [81]:
# Find the missing key-value pair (if needed)
missing_key = None
for key in tok2_spatial_relations:
    if key not in pred2_spatial_relation_expression:
        missing_key = key
        break

print("Missing key:", missing_key)


Missing key: None


In [82]:
spatial_relation_expression_df = pd.DataFrame([(k,v) for k,v in pred2_spatial_relation_expression.items()], columns=["ID", "SPATIAL RELATION EXPRESSION"]) #where 'tok2sr_df' is a dataframe containing token IDs and IDs of all the tokens annotated with a Spatial relation
spatial_relation_expression_df

Unnamed: 0,ID,SPATIAL RELATION EXPRESSION
0,7504Aen,[ABL]
1,7807Aen,"[DAT, adverb]"
2,638Ap,[ACC]
3,3306Ap,"[per + ACC, ad + ACC]"
4,4001Ap,[ACC]
...,...,...
683,424445Verg,[DAT]
684,428401Verg,[DAT]
685,436869Verg,[DAT]
686,438653Verg,[DAT]


In [83]:
id_spatial_relation_expression_df = spatial_relation_expression_df.merge(id_spatial_relation_roles_lemmas_synsets_df, on='ID', how='left')
id_spatial_relation_expression_df

Unnamed: 0,ID,SPATIAL RELATION EXPRESSION,SPATIAL RELATION ROLE,SPATIAL RELATION LEMMA,SPATIAL RELATION SEMANTICS
0,7504Aen,[ABL],[LOCATION],[campus],[n#06727012 extensive tract of level open land]
1,7807Aen,"[DAT, adverb]","[GOAL, SOURCE]","[tribunus, undique]",[n#07168973 an officer in command of a militar...
2,638Ap,[ACC],[GOAL],[sermo],[n#05167497 a human written or spoken language...
3,3306Ap,"[per + ACC, ad + ACC]","[PATH, GOAL]","[ingluvies, occipitium]",[n#04296952 the passage to the stomach and lun...
4,4001Ap,[ACC],[GOAL],[civitas],[n#06382213 an urban area with a fixed boundar...
...,...,...,...,...,...
683,424445Verg,[DAT],[GOAL],[Hyllus],[n#00004123 a human being]
684,428401Verg,[DAT],[GOAL],[hic],[n#00004123 a human being]
685,436869Verg,[DAT],[GOAL],[frater],[n#07127521 a male with the same parents as so...
686,438653Verg,[DAT],[GOAL],[Iuturna],[n#06381267 a point located with respect to su...


In [84]:
verbs_with_participants_and_spatial_relations = verb_with_participant_roles_lemmas_df.merge(id_spatial_relation_expression_df, on='ID', how='left')
verbs_with_participants_and_spatial_relations

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING,VERB STEM,PREVERB,PREVERB SEMANTICS,VERB SEMANTICS,FIGURE SEMANTICS,GROUND SEMANTICS,PARTICIPANT LEMMA,PARTICIPANT ROLE,SPATIAL RELATION EXPRESSION,SPATIAL RELATION ROLE,SPATIAL RELATION LEMMA,SPATIAL RELATION SEMANTICS
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False,present stem,per,"(idea of destruction/death), across",[v#00250254 pass from physical life and lose a...,['n#00004123 a human being'],,,,[ABL],[LOCATION],[campus],[n#06727012 extensive tract of level open land]
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True,present stem,cum,together,[v#01075789 crash together with violent impact],['n#00004123 a human being'],,,,,,,
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True,present stem,cum,together,[v#01654097 collect in one place],['n#03176413 a body that is thrown or projected'],[n#07168973 an officer in command of a militar...,"[telum, tribunus]","[Ground, Figure]","[DAT, adverb]","[GOAL, SOURCE]","[tribunus, undique]",[n#07168973 an officer in command of a militar...
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False,present stem,prae,before,"[v#01661609 be a guiding force, as with direct...",['n#07632177 a person whose occupation is teac...,,[magister],[Figure],,,,
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False,supine stem,ad,to,[v#01661230 begin to deal with],['n#00004123 a human being'],[n#05167497 a human written or spoken language...,[sermo],[Ground],[ACC],[GOAL],[sermo],[n#05167497 a human written or spoken language...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True,present stem,pro,forward,[v#01410345 run or move very quickly or hastily],['n#06888584 a female deity'],,[dea],[Figure],,,,
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False,present stem,sub,under,[v#01737682 help in a difficult situation],['n#06888584 a female deity'],[n#07127521 a male with the same parents as so...,"[Iuturna, frater]","[Ground, Figure]",[DAT],[GOAL],[frater],[n#07127521 a male with the same parents as so...
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False,present stem,ob,to,[v#01410345 run or move very quickly or hastily],['n#10169961 a serious (sometimes fatal infect...,[n#06381267 a point located with respect to su...,[Iuturna],[Ground],[DAT],[GOAL],[Iuturna],[n#06381267 a point located with respect to su...
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False,present stem,sub,under,[v#01343923 raise from a lower to a higher pos...,['n#00004123 a human being'],[n#06669293 a lump of hard consolidated minera...,[ille],[Ground],,,,


In [85]:
verbs_with_participants_and_spatial_relations.to_csv('Latin_with_participants_and_spatial_relations.csv')

### Places (WHG)

In [86]:
list_places = []  # Initializing an empty list to store places
pred2_place = dict()  # Initializing an empty dictionary to map tokens to their places
count = 0  # Initializing a count variable to keep track of iterations

# Looping through each file in the list of all_files
for i in range(len(all_files)):
    file_input = all_files[i]  # Current file name
    file_input_abbr = files_abbreviated[i]  # Abbreviated name for the current file
    xml = get_xml(file_input)  # Retrieving XML content from the current file
    cas = load_cas(file_input)  # Loading CAS from the current file
    
    # Looping through each 'Place' annotation in the CAS
    for relation in cas.select('webanno.custom.Place'):
        # Looping through each token covered by 'Place' annotation
        for token in cas.select_covered('webanno.custom.Place', relation):
            tok = token.get_covered_text()  # Covered text of the token
            id = str(token.begin) + file_input_abbr  # Creating a unique ID for the token based on its offset and file abbreviation
            place = relation.Places  # Places associated with the token
            
            # Checking if the ID already exists in pred2_place
            if id in pred2_place:
                list_place = pred2_place[id] + place  # If so, append the places to the existing list of places
                pred2_place[id] = list_place  # Update the dictionary with the appended list
            else:
                pred2_place[id] = place  # If not, create a new entry with a list containing the places

print(pred2_place)  



{'4840Caes': 'https://whgazetteer.org/places/14153618/portal', '5323Caes': 'https://whgazetteer.org/places/12347418/portal', '5989Caes': 'https://whgazetteer.org/places/84296/detail', '29911Caes': 'https://whgazetteer.org/places/12347419/portal', '31358Caes': 'https://whgazetteer.org/places/12347419/portal', '33960Caes': 'https://whgazetteer.org/places/12347419/portal', '37510Caes': 'https://whgazetteer.org/places/12347419/portal', '46737Caes': 'https://whgazetteer.org/places/12347419/portal', '46862Caes': 'https://whgazetteer.org/places/12347419/portal', '56119Caes': 'https://whgazetteer.org/places/12347419/portal', '70231Caes': 'https://whgazetteer.org/places/12636696/portal', '106021Caes': 'https://whgazetteer.org/places/6465620/detail', '111957Caes': 'https://whgazetteer.org/places/81460/detail', '111967Caes': 'https://whgazetteer.org/places/82552/detail', '114147Caes': 'https://whgazetteer.org/places/12347419/portal', '117025Caes': 'https://whgazetteer.org/places/12347419/portal',

In [87]:
places_df = pd.DataFrame([(k,v) for k,v in pred2_place.items()], columns=["PLACE ID", "WHG URL"]) #where 'tok2sr_df' is a dataframe containing token IDs and IDs of all the tokens annotated with a Spatial relation
places_df

Unnamed: 0,PLACE ID,WHG URL
0,4840Caes,https://whgazetteer.org/places/14153618/portal
1,5323Caes,https://whgazetteer.org/places/12347418/portal
2,5989Caes,https://whgazetteer.org/places/84296/detail
3,29911Caes,https://whgazetteer.org/places/12347419/portal
4,31358Caes,https://whgazetteer.org/places/12347419/portal
5,33960Caes,https://whgazetteer.org/places/12347419/portal
6,37510Caes,https://whgazetteer.org/places/12347419/portal
7,46737Caes,https://whgazetteer.org/places/12347419/portal
8,46862Caes,https://whgazetteer.org/places/12347419/portal
9,56119Caes,https://whgazetteer.org/places/12347419/portal


In [88]:
pred2_url_relations = dict()  # Initialize an empty dictionary to store mappings between predicate IDs and their associated web URLs

# Iterate through each predicate ID in the dictionary tok2_spatial_relations
for pred_id in tok2_spatial_relations:
    spatial_relation_ids = tok2_spatial_relations[pred_id]  # Retrieve the list of spatial relation IDs associated with the current predicate ID
    url_relations = list()  # Initialize an empty list to store web URLs corresponding to spatial relations
    
    # Iterate through each spatial relation ID in the list of IDs
    for spatial_relation_id in spatial_relation_ids:
        # Retrieve the web URL associated with the current spatial relation ID from pred2_place dictionary
        url_relation = pred2_place.get(spatial_relation_id, None)
        if url_relation is not None:
            url_relations.append(url_relation)  # Append the web URL to the list of URLs for the current predicate ID
            
    pred2_url_relations[pred_id] = url_relations  # Assign the list of URLs to the current predicate ID in the pred2_url_relations dictionary
        
print(pred2_url_relations)  


{'7504Aen': [], '7807Aen': [], '638Ap': [], '3306Ap': [], '4001Ap': [], '6965Ap': [], '11123Ap': [], '11895Ap': [], '14102Ap': [], '20808Ap': [], '21919Ap': [], '31740Ap': [], '32491Ap': [], '34900Ap': [], '37524Ap': [], '38710Ap': [], '46705Ap': [], '51135Ap': [], '55702Ap': [], '63203Ap': [], '69546Ap': [], '71331Ap': [], '71779Ap': [], '74642Ap': [], '84767Ap': [], '87247Ap': [], '93188Ap': [], '99825Ap': [], '105746Ap': [], '110168Ap': [], '111432Ap': [], '113591Ap': [], '116038Ap': [], '117021Ap': [], '125471Ap': [], '133136Ap': [], '134636Ap': [], '140607Ap': [], '149618Ap': [], '150834Ap': [], '158305Ap': [], '161942Ap': [], '162189Ap': [], '162465Ap': [], '1498Caes': [], '4296Caes': [], '4578Caes': [], '4854Caes': ['https://whgazetteer.org/places/14153618/portal'], '4978Caes': [], '5732Caes': [], '5997Caes': ['https://whgazetteer.org/places/84296/detail'], '7012Caes': [], '9426Caes': [], '9649Caes': [], '10407Caes': [], '10617Caes': [], '10862Caes': [], '10890Caes': [], '10931C

In [89]:
pred2_url_relations = dict()  # Initialize an empty dictionary to store mappings between predicate IDs and their associated web URLs

# Iterate through each predicate ID in the dictionary tok2_spatial_relations
for pred_id in tok2_spatial_relations:
    spatial_relation_ids = tok2_spatial_relations[pred_id]  # Retrieve the list of spatial relation IDs associated with the current predicate ID
    url_relation = None  # Initialize url_relation to None
    
    # Iterate through each spatial relation ID in the list of IDs
    for spatial_relation_id in spatial_relation_ids:
        # Retrieve the web URL associated with the current spatial relation ID from pred2_place dictionary
        url_relation = pred2_place.get(spatial_relation_id, None)
        if url_relation is not None:
            # If a URL is found, break the loop
            break
    
    pred2_url_relations[pred_id] = url_relation  # Assign the URL to the current predicate ID in the pred2_url_relations dictionary
        
print(pred2_url_relations)  


{'7504Aen': None, '7807Aen': None, '638Ap': None, '3306Ap': None, '4001Ap': None, '6965Ap': None, '11123Ap': None, '11895Ap': None, '14102Ap': None, '20808Ap': None, '21919Ap': None, '31740Ap': None, '32491Ap': None, '34900Ap': None, '37524Ap': None, '38710Ap': None, '46705Ap': None, '51135Ap': None, '55702Ap': None, '63203Ap': None, '69546Ap': None, '71331Ap': None, '71779Ap': None, '74642Ap': None, '84767Ap': None, '87247Ap': None, '93188Ap': None, '99825Ap': None, '105746Ap': None, '110168Ap': None, '111432Ap': None, '113591Ap': None, '116038Ap': None, '117021Ap': None, '125471Ap': None, '133136Ap': None, '134636Ap': None, '140607Ap': None, '149618Ap': None, '150834Ap': None, '158305Ap': None, '161942Ap': None, '162189Ap': None, '162465Ap': None, '1498Caes': None, '4296Caes': None, '4578Caes': None, '4854Caes': 'https://whgazetteer.org/places/14153618/portal', '4978Caes': None, '5732Caes': None, '5997Caes': 'https://whgazetteer.org/places/84296/detail', '7012Caes': None, '9426Caes':

In [90]:
id_places_df = pd.DataFrame([(k,v) for k,v in pred2_url_relations.items()], columns=["ID", "WHG URL"]) #where 'id_places_df' is a dataframe containing verb IDs and WHG URLs
id_places_df

Unnamed: 0,ID,WHG URL
0,7504Aen,
1,7807Aen,
2,638Ap,
3,3306Ap,
4,4001Ap,
...,...,...
683,424445Verg,
684,428401Verg,
685,436869Verg,
686,438653Verg,


### Getting the final CSV for Latin

In [91]:
Latin_full = verbs_with_participants_and_spatial_relations.merge(id_places_df, on='ID', how='left')
Latin_full

Unnamed: 0,ID,VERB TOKEN,MORPHOLOGICAL FEATURES,LEMMA,SENTENCE,ACTIONALITY,VERB CLASS,LITERAL MEANING,VERB STEM,PREVERB,...,VERB SEMANTICS,FIGURE SEMANTICS,GROUND SEMANTICS,PARTICIPANT LEMMA,PARTICIPANT ROLE,SPATIAL RELATION EXPRESSION,SPATIAL RELATION ROLE,SPATIAL RELATION LEMMA,SPATIAL RELATION SEMANTICS,WHG URL
0,7504Aen,perire,Tense=Pres|VerbForm=Inf|Voice=Act,pereo,"Quae neque Dardaniis campis potuere perire, ...",Achievement,DIE-42.4.1,False,present stem,per,...,[v#00250254 pass from physical life and lose a...,['n#00004123 a human being'],,,,[ABL],[LOCATION],[campus],[n#06727012 extensive tract of level open land],
1,7614Aen,Concurrunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,concurro,Concurrunt vel uti venti cum spiritus austri ...,Achievement,BUMP-18.4-1,True,present stem,cum,...,[v#01075789 crash together with violent impact],['n#00004123 a human being'],,,,,,,,
2,7807Aen,conveniunt,Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbF...,convenio,Undique conveniunt vel ut imber tela tribuno,Activity,HERD-47.5.2,True,present stem,cum,...,[v#01654097 collect in one place],['n#03176413 a body that is thrown or projected'],[n#07168973 an officer in command of a militar...,"[telum, tribunus]","[Ground, Figure]","[DAT, adverb]","[GOAL, SOURCE]","[tribunus, undique]",[n#07168973 an officer in command of a militar...,
3,627Ap,praeeunte,Case=Abl|Gender=Masc|Number=Sing|Tense=Pres|Ve...,praeeo,"Mox in urbe Latia advena studiorum, Quiritium ...",State,ACCOMPANY-51.7,False,present stem,prae,...,"[v#01661609 be a guiding force, as with direct...",['n#07632177 a person whose occupation is teac...,,[magister],[Figure],,,,,
4,638Ap,aggressus,Case=Nom|Gender=Masc|Number=Sing|Tense=Past|Ve...,adgredior,"Mox in urbe Latia advena studiorum, Quiritium ...",Accomplishment,BEGIN-55.1,False,supine stem,ad,...,[v#01661230 begin to deal with],['n#00004123 a human being'],[n#05167497 a human written or spoken language...,[sermo],[Ground],[ACC],[GOAL],[sermo],[n#05167497 a human written or spoken language...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,435595Verg,procurrit,Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbF...,procurro,"Dum nititur acer et instat, rursus in aurigae ...",Achievement,RUN-51.3.2,True,present stem,pro,...,[v#01410345 run or move very quickly or hastily],['n#06888584 a female deity'],,[dea],[Figure],,,,,
1479,436869Verg,succurrere,Tense=Pres|VerbForm=Inf|Voice=Act,succurro,"Iuturnam misero, fateor, succurrere fratri sua...",Activity,HELP-72.1,False,present stem,sub,...,[v#01737682 help in a difficult situation],['n#06888584 a female deity'],[n#07127521 a male with the same parents as so...,"[Iuturna, frater]","[Ground, Figure]",[DAT],[GOAL],[frater],[n#07127521 a male with the same parents as so...,
1480,438653Verg,occurrere,Tense=Pres|VerbForm=Inf|Voice=Act,occurro,Harum unam celerem demisit ab aethere summo Iu...,Accomplishment,RUN-51.3.2,False,present stem,ob,...,[v#01410345 run or move very quickly or hastily],['n#10169961 a serious (sometimes fatal infect...,[n#06381267 a point located with respect to su...,[Iuturna],[Ground],[DAT],[GOAL],[Iuturna],[n#06381267 a point located with respect to su...,
1481,440736Verg,subirent,Mood=Subj|Number=Plur|Person=3|Tense=Imp|VerbF...,subeo,"Vix illud lecti bis sex cervice subirent, qual...",Achievement,PUT_DIRECTION-9.4,False,present stem,sub,...,[v#01343923 raise from a lower to a higher pos...,['n#00004123 a human being'],[n#06669293 a lump of hard consolidated minera...,[ille],[Ground],,,,,


In [92]:
Latin_full.to_csv('Latin_FULL.csv')