# Convert TEI in 010_manannot to NoSkE verticals with XSL and Saxon

In [1]:
from os import scandir, path
import logging
from pathlib import Path
from urllib.parse import urlsplit
import saxonche
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)

In [2]:
# the root of the git repository
shawidataHomeDir = "../.."

# path to project-specific stylesheets
pathToShawiStylesheetsDir = shawidataHomeDir+"/082_scripts_xsl"
pathToMannannotToVertXSL=pathToShawiStylesheetsDir+"/mananot-to-vert.xsl"

# the path to the manually annotated files
pathToManannot = shawidataHomeDir+"/010_manannot"

# the path to the NoSkE verticals
noSkEVertDir = shawidataHomeDir+"/130_vert_plain"

In [3]:
def transform(s, xsl, o, parameters=[]):
    # processor keeps files open on Windows and in doing so prevents moving or copying them
    with saxonche.PySaxonProcessor(license=False) as proc:
        proc.set_configuration_property("xi", "on")
        saxon = proc.new_xslt30_processor()
        for i in parameters:
            saxon.set_parameter(name=i, value=proc.make_string_value(parameters[i]))
        try:
            exec = saxon.compile_stylesheet(stylesheet_file=path.abspath(xsl))
            exec.set_global_context_item(file_name=path.abspath(s))
            # From the docs saxonc.html#PyXsltExecutable-set_initial_match_selection
            # This method does not set the global context item for the transformation;
            # if that is required, it can be done separately using the set_global_context_item method.
            exec.apply_templates_returning_file(source_file=path.abspath(s), output_file=path.abspath(o))
        except saxonche.PySaxonApiError as e:
            logging.info(str(e))
            logging.info(path.abspath(s)+" - "+path.abspath(xsl)+" -> "+path.abspath(o)+" failed")
        if proc.exception_occurred:
            logging.info(proc.get_error_message())
            logging.info(path.abspath(s)+" - "+path.abspath(xsl)+" -> "+path.abspath(o)+" failed")
        if path.exists(path.abspath(o)):
            return o
        else: 
            logging.info("there was an error transforming "+s+" with stylesheet "+xsl)

In [4]:
manannotDocs = []

for i in scandir(pathToManannot):
    filename=path.basename(i)
    if filename.endswith(".xml"):
        basename=Path(i).stem
        if basename in ('fLib', 'fLib_old', 'shawi_standoff', 'standoff_comp'): continue
        manannotDocs.append({
            "filepath" : path.abspath(i),
            "filename" : filename,
            "basename" : basename
        })
        
# for d in manannotDocs:
#    logging.info(d["filepath"])

In [5]:
for doc in manannotDocs:
    outFile = noSkEVertDir + "/" + doc["basename"] + ".txt"
    logging.info(pathToMannannotToVertXSL + ': ' + doc["filepath"] + '->' + outFile)
    transform(
        s = doc['filepath'],
        xsl = pathToMannannotToVertXSL, 
        o = outFile
    )

2025-10-28 17:39:26,469 - ../../082_scripts_xsl/mananot-to-vert.xsl: Q:\basexshawi\shawi-data\010_manannot\Urfa-000_Namrud-Harran-2001.xml->../../130_vert_plain/Urfa-000_Namrud-Harran-2001.txt
2025-10-28 17:39:27,164 - ../../082_scripts_xsl/mananot-to-vert.xsl: Q:\basexshawi\shawi-data\010_manannot\Urfa-002a_Joke_about_a_tribe.xml->../../130_vert_plain/Urfa-002a_Joke_about_a_tribe.txt
2025-10-28 17:39:27,787 - ../../082_scripts_xsl/mananot-to-vert.xsl: Q:\basexshawi\shawi-data\010_manannot\Urfa-002b_Blood_feud_in_the_past.xml->../../130_vert_plain/Urfa-002b_Blood_feud_in_the_past.txt
2025-10-28 17:39:28,476 - ../../082_scripts_xsl/mananot-to-vert.xsl: Q:\basexshawi\shawi-data\010_manannot\Urfa-002c_The_incomplete_meal.xml->../../130_vert_plain/Urfa-002c_The_incomplete_meal.txt
2025-10-28 17:39:29,092 - ../../082_scripts_xsl/mananot-to-vert.xsl: Q:\basexshawi\shawi-data\010_manannot\Urfa-002d_The_bull_in_the_jar.xml->../../130_vert_plain/Urfa-002d_The_bull_in_the_jar.txt
2025-10-28 17:3