In [1]:
# Import macula lowfat xml
import lxml.etree as ET

xml_root = "/home/jcuenod/Programming/symphony-stuff/symphony-backend-atlas-internal/data/Clear-Bible/macula-greek/SBLGNT/lowfat"
file_name = "01-matthew.xml"
tree = ET.parse(xml_root + "/" + file_name)
root = tree.getroot()

# Get all the words
words = root.findall(".//w")
print("Number of words: " + str(len(words)))

# print the first 5 words
for word in words[:5]:
    print(word.text)

Number of words: 18329
Βίβλος
γενέσεως
Ἰησοῦ
χριστοῦ
υἱοῦ


In [2]:
from util import find_main_clause_verbs
main_clause_candidates = find_main_clause_verbs(root)
print("Number of main clauses: " + str(len(main_clause_candidates)))
for clause in main_clause_candidates[:5]:
    print(clause.text)

Number of main clauses: 2744
ἐγέννησεν
ἐγέννησεν
ἐγέννησεν
ἐγέννησεν
ἐγέννησεν


In [3]:
# Now we iterate through the main_clause_candidates and find adjunct participles that
#  (1) precede the main clause
#  (2) are not substantive

from util import find_non_substantive_participles

adjunct_participle_candidates = []
for main_clause_verb in main_clause_candidates:
    # iterate through siblings that precede the main clause
    for sibling in main_clause_verb.itersiblings(preceding=True):
        if sibling.attrib.get("role") != "adv":
            continue
        adjunct_participles = find_non_substantive_participles(sibling)
        if len(adjunct_participles) > 0:
            adjunct_participle_candidates.append((main_clause_verb, sibling))
            continue

print("Number of adjunct participle candidates: " + str(len(adjunct_participle_candidates)))

Number of adjunct participle candidates: 358


In [4]:
# now we're going to create a list of macula ids for each word in the adjunct and pair it with a verse ref
def get_id(node):
    return node.attrib.get("{http://www.w3.org/XML/1998/namespace}id")

def get_id_list(node):
    words = node.findall(".//w")
    return [get_id(word) for word in words]

def get_ref(node):
    return node.attrib.get("ref").split("!")[0]

id_list_by_ref = []
for candidate in adjunct_participle_candidates:
    verb = candidate[0]
    adjunct = candidate[1]
    # Imperfect 
    ref = get_ref(adjunct.find(".//w[@ref]"))
    id_list_by_ref.append((ref, get_id(verb), get_id_list(adjunct)))

In [5]:
# export to csv
import csv
with open('backgrounded-phrases.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    for row in id_list_by_ref:
        writer.writerow(row)

In [6]:
print(id_list_by_ref[0])

('MAT 1:19', 'n40001019013', ['n40001019006', 'n40001019007', 'n40001019008', 'n40001019009', 'n40001019010', 'n40001019011', 'n40001019012'])


In [11]:
from util import macula_tsv_by_ref
from IPython.display import display, HTML

html_string = "<table>"
for ref, verb, id_list in id_list_by_ref:
    # get the macula tsv for the roi
    macula_tsv_for_roi = macula_tsv_by_ref[ref]

    html_string += f"<tr><td width='100px'><b>{ref}</b></td><td style='text-align: left !important;'>"
    for word in macula_tsv_for_roi:
        text = word["text"]
        if word["xml:id"] == verb:
            text = "<span style='background-color: #93C5FD'>" + text + "</span>"
        if word["xml:id"] in id_list:
            text = "<span style='background-color: #FED7AA'>" + text + "</span>"
        after = word["after"]
        html_string += text + after
    html_string += "</td></tr>"
html_string += "</table>"

display(HTML(html_string))

0,1
MAT 1:19,"Ἰωσὴφ δὲ ὁ ἀνὴρ αὐτῆς,δίκαιος ὢν καὶ μὴ θέλων αὐτὴν δειγματίσαι,ἐβουλήθη λάθρᾳ ἀπολῦσαι αὐτήν."
MAT 1:24,ἐγερθεὶς δὲ ὁ Ἰωσὴφ ἀπὸ τοῦ ὕπνου ἐποίησεν ὡς προσέταξεν αὐτῷ ὁ ἄγγελος κυρίου καὶ παρέλαβεν τὴν γυναῖκα αὐτοῦ·
MAT 2:3,"ἀκούσας δὲ ὁ βασιλεὺς Ἡρῴδης ἐταράχθη καὶ πᾶσα Ἱεροσόλυμα μετ’ αὐτοῦ,"
MAT 2:4,καὶ συναγαγὼν πάντας τοὺς ἀρχιερεῖς καὶ γραμματεῖς τοῦ λαοῦ ἐπυνθάνετο παρ’ αὐτῶν ποῦ ὁ χριστὸς γεννᾶται.
MAT 2:7,"Τότε Ἡρῴδης λάθρᾳ καλέσας τοὺς μάγους ἠκρίβωσεν παρ’ αὐτῶν τὸν χρόνον τοῦ φαινομένου ἀστέρος,"
MAT 2:8,"καὶ πέμψας αὐτοὺς εἰς Βηθλέεμ εἶπεν·Πορευθέντες ἐξετάσατε ἀκριβῶς περὶ τοῦ παιδίου·ἐπὰν δὲ εὕρητε,ἀπαγγείλατέ μοι,ὅπως κἀγὼ ἐλθὼν προσκυνήσω αὐτῷ."
MAT 2:8,"καὶ πέμψας αὐτοὺς εἰς Βηθλέεμ εἶπεν·Πορευθέντες ἐξετάσατε ἀκριβῶς περὶ τοῦ παιδίου·ἐπὰν δὲ εὕρητε,ἀπαγγείλατέ μοι,ὅπως κἀγὼ ἐλθὼν προσκυνήσω αὐτῷ."
MAT 2:8,"καὶ πέμψας αὐτοὺς εἰς Βηθλέεμ εἶπεν·Πορευθέντες ἐξετάσατε ἀκριβῶς περὶ τοῦ παιδίου·ἐπὰν δὲ εὕρητε,ἀπαγγείλατέ μοι,ὅπως κἀγὼ ἐλθὼν προσκυνήσω αὐτῷ."
MAT 2:9,"οἱ δὲ ἀκούσαντες τοῦ βασιλέως ἐπορεύθησαν,καὶ ἰδοὺ ὁ ἀστὴρ ὃν εἶδον ἐν τῇ ἀνατολῇ προῆγεν αὐτούς,ἕως ἐλθὼν ἐστάθη ἐπάνω οὗ ἦν τὸ παιδίον."
MAT 2:9,"οἱ δὲ ἀκούσαντες τοῦ βασιλέως ἐπορεύθησαν,καὶ ἰδοὺ ὁ ἀστὴρ ὃν εἶδον ἐν τῇ ἀνατολῇ προῆγεν αὐτούς,ἕως ἐλθὼν ἐστάθη ἐπάνω οὗ ἦν τὸ παιδίον."
