# Prepare verse corpus for the responsion analysis

## 1. Extraction

In [1]:
from src.extract import transform_tei

input_file = "data/source/01_olympia.xml"
output_file = "data/raw/01_olympia.xml"

transform_tei(input_file, output_file)



## 2. Macronization

In [2]:
from src.macronize import macronize_xml
#from src.tidy import tidy_xml

input_file = "data/raw/ol01.xml"
output_file = "data/macronized/ol01_macronized.xml"

macronize_xml(input_file, output_file)

100%|██████████| 120/120 [00:04<00:00, 25.80it/s]


## 2a. Rule scansion

In [2]:
from src.scan import rule_scansion

input = "ἁνίκ' ἄγκυ_ρα^ν ποτὶ^ χαλκόγενυ^ν"
scanned = rule_scansion(input)
print(scanned)
input = "νυκτὶ^ κοινά_σαντες ὁδόν, Κρονί^δᾳ δὲ τˈρά^φεν Χίρωνι^ δῶκα^ν."
scanned = rule_scansion(input)
print(f"\n{scanned}")
input = "'Οὔ τί^ που οὗτος Ἀπόλλων, οὐδὲ μὰ_ν χαλκάρμα^τός ἐστι^ πόσι^ς"
scanned = rule_scansion(input)
print(f"\n{scanned}")
input = "εἰ μετάλλατόν τι^· καὶ ὡς τά^χος ὀτˈρύνει με τεύχειν να_ῒ^ πομπάν."
scanned = rule_scansion(input)
print(f"\n{scanned}")


{ἁ}{νί}[κ' ἄγ][κυ_][ρα^ν ]{πο}{τὶ^ }[χαλ]{κό}{γε}[νυ^ν]

[νυκ]{τὶ^ }[κοι][νά_][σαν]{τε}{ς ὁ}[δόν, ]{Κρο}{νί^}[δᾳ ][δὲ τˈ]{ρά^}[φεν ]{Χί}[ρω]{νι^ }[δῶ][κα^ν.]

['Οὔ ]{τί^ }{που }[οὗ]{το}[ς Ἀ][πόλ][λω][ν, οὐ]{δὲ }[μὰ_ν ][χαλ][κάρ]{μα^}{τό}[ς ἐσ]{τι^ }{πό}[σι^ς]

[εἰ ]{με}[τάλ]{λα}[τόν ]{τι^· }{καὶ }[ὡς ]{τά^}{χο}[ς ὀτˈ]{ρύ}[νει ]{με }[τεύ][χειν ][να_]{ῒ^ }[πομ][πάν.]


In [3]:
from src.scan import scan_xml

input_file = "data/raw/ol01.xml"
output_file = "data/scan/ol01.xml"
scan_xml(input_file, output_file, debug=False)

120it [00:00, 671.42it/s]


## 2b. Extract scansion from Hypotactic 

In [2]:
from scan_ht import extract_strophic_syllables_from_html, create_tei_xml

for odes in [["isthmians", "is", "Isthmian Odes"]]:
    html_file = f"ht/is03.html"

    poems_dict = extract_strophic_syllables_from_html(html_file, debug=True)

    tei_xml = create_tei_xml(poems_dict, title=odes[2], prefix=odes[1], output_file=f"data/scan/ht_{odes[0]}_FIX_IS3.xml")

Processing file: ht/is03.html
Found 1 poem divs

Processing poem 1
Found 3 strophe divs in poem 1
  Strophe 1: type='Strophe', num='1'
  Classes: ['strophe']
    Processing as key: strophe_1
    Found 7 total child divs
      Checking div with classes: ['stropheheader']
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked', 'fifth']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
    Found 6 child line divs
      Child line 1: classes ['line', 'checked']
      Found 8 word spans
        Word 1: 1 sylls
          Span: classes=['syll', 'long'], content='εἴ'
        Word 2: 1 sylls
          Sp

In [2]:
from scan_ht import extract_strophic_syllables_from_html, create_tei_xml

for odes in [["olympians", "ol", "Olympian Odes"], ["pythians", "py", "Pythian Odes"], ["nemeans", "ne", "Nemean Odes"], ["isthmians", "is", "Isthmian Odes"]]:
    html_file = f"ht/{odes[0]}.html"

    poems_dict = extract_strophic_syllables_from_html(html_file, debug=True)

    tei_xml = create_tei_xml(poems_dict, title=odes[2], prefix=odes[1], output_file=f"data/scan/ht_{odes[0]}.xml")

Processing file: ht/olympians.html
Found 14 poem divs

Processing poem 1
Found 12 strophe divs in poem 1
  Strophe 1: type='Strophe', num='1'
  Classes: ['strophe']
    Processing as key: strophe_1
    Found 12 total child divs
      Checking div with classes: ['stropheheader']
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked', 'fifth']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line', 'checked']
        -> This is a line div!
      Checking div with classes: ['line'

## 3. Compilation

See compiler.py