In [1]:
import re
from lxml import etree
import html
import os
import glob # Import glob to find files

# --- Configuration ---
# Directory containing your XML files ( ADJUST AS NEEDED )
# Use '../' to go up one level if needed, '.' for current
SOURCE_DIR = '../GRC_misc/'

# Glob pattern to find the desired XML files ( ADJUST AS NEEDED )
# Examples:
# 'aesch.*.headlam_eng2.xml' # Specific Aeschylus Headlam
# '*.some_author.*.xml'      # All works by an author
# '*.*grc*.xml'              # All Greek files
# '*.*eng*.xml'              # All English files
FILE_PATTERN = 'aesch.*.headlam_eng2.xml' # <-- CHANGE THIS PATTERN AS NEEDED
#FILE_PATTERN = 'aesch.*.swanwick.xml' # <-- CHANGE THIS PATTERN AS NEEDED
#FILE_PATTERN = 'xenophon01.watson_1854.xml'
#FILE_PATTERN = 'aesch.*.buckley1849-eng2.xml' # <-- CHANGE THIS PATTERN AS NEEDED
#FILE_PATTERN = 'lucian.williams1888.xml'



# Base directory for all HTML outputs
BASE_OUTPUT_DIR = './html_output/'

# --- Dynamic Output Directory ---
# Creates a sub-folder based on the first part of the pattern
# e.g., 'headlam' from 'aesch.*.headlam_eng2.xml'
# You can customize this logic or set a specific name
pattern_parts = FILE_PATTERN.split('.')
output_subfolder = pattern_parts[2] if len(pattern_parts) > 3 else 'default_output'
OUTPUT_DIR = os.path.join(BASE_OUTPUT_DIR, output_subfolder)

# Ensure the output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Find XML files using the specified pattern and source directory
XML_FILES = glob.glob(os.path.join(SOURCE_DIR, FILE_PATTERN))

# XML namespaces
ns = {'tei': 'http://www.tei-c.org/ns/1.0',
      'xml': 'http://www.w3.org/XML/1998/namespace'} # Add xml namespace

if not XML_FILES:
    print(f"Warning: No XML files found in '{SOURCE_DIR}' matching '{FILE_PATTERN}'")
else:
    print(f"Found {len(XML_FILES)} XML files in '{SOURCE_DIR}' matching '{FILE_PATTERN}':")
    # Sort for consistent order
    XML_FILES.sort()
    for f in XML_FILES:
        print(f"  - {os.path.basename(f)}")
print(f"HTML output will be saved in '{OUTPUT_DIR}'.")

Found 7 XML files in '../GRC_misc/' matching 'aesch.*.headlam_eng2.xml':
  - aesch.ag.headlam_eng2.xml
  - aesch.eum.headlam_eng2.xml
  - aesch.lib.headlam_eng2.xml
  - aesch.pb.headlam_eng2.xml
  - aesch.pers.headlam_eng2.xml
  - aesch.seven.headlam_eng2.xml
  - aesch.supp.headlam_eng2.xml
HTML output will be saved in './html_output/headlam_eng2'.


In [2]:
from collections import defaultdict
import html
from lxml import etree

# XML namespaces (re-declared here for clarity within the cell)
ns = {'tei': 'http://www.tei-c.org/ns/1.0',
      'xml': 'http://www.w3.org/XML/1998/namespace'}

def first_ancestor_n(el, subtype):
    # nearest ancestor div of given subtype
    hit = el.xpath(f"ancestor::tei:div[@type='textpart' and @subtype='{subtype}'][1]", namespaces=ns)
    return hit[0].get('n') if hit else None

def preceding_section_milestone_n(p):
    # look for an immediately preceding milestone @unit='section'
    prev = p.getprevious()
    if prev is not None and isinstance(prev.tag, str):
        if etree.QName(prev.tag).localname == 'milestone' and prev.get('unit') == 'section':
            return prev.get('n')
    return None

# per (book, chapter) section counter (starts at 0, increment on first paragraph with no explicit @n)
section_counter = defaultdict(int)

def get_preceding_pb_n(el):
    """Finds the 'n' attribute of the nearest preceding tei:pb tag."""
    # Correctly include the namespace prefix 'tei:' in the XPath
    pb = el.xpath('preceding::tei:pb[1]', namespaces=ns)
    return pb[0].get('n') if pb else None

def build_inner_html(parent_element):
    """
    Recursively serialize a TEI element’s text and children into inline HTML.
    Handles comments and pre-processed HTML tags like i, span, a, br.
    """
    parts = []
    if parent_element.text:
        parts.append(html.escape(parent_element.text))

    for child in parent_element.iterchildren():

        if not isinstance(child.tag, str): # Skip comments, processing instructions, etc.
            if child.tail:
                parts.append(html.escape(child.tail))
            continue

        tagname = etree.QName(child).localname

        # List of known inline HTML tags created during pre-processing or naturally occurring
        inline_html_tags = {"i", "b", "em", "strong", "span", "a", "sub", "sup", "q"}

        if tagname in inline_html_tags:
            # Reconstruct the HTML tag, including attributes
            attrs = " ".join([f'{k}="{html.escape(v)}"' for k,v in child.attrib.items() if '{' not in k])
            inner = build_inner_html(child) # Recurse
            if attrs:
                parts.append(f'<{tagname} {attrs}>{inner}</{tagname}>')
            else:
                parts.append(f'<{tagname}>{inner}</{tagname}>')

        elif tagname == "br":
            parts.append("<br>")

        elif tagname == "hr":
            # Pass the <hr> tag through directly
            attrs = " ".join([f'{k}="{html.escape(v)}"' for k,v in child.attrib.items() if '{' not in k])
            if attrs:
                parts.append(f'<hr {attrs}>')
            else:
                 parts.append('<hr>')
        
        elif tagname == "l":
            inner = build_inner_html(child) # Get content inside the <l> tag
            # Render as a span with a specific class. Add <br> if line break is desired.
            parts.append(f'<br><span class="verse-line-inline">{inner}</span>')
        
        elif tagname == "title":
            inner_html_for_title = build_inner_html(child) # Get content recursively
            # Get attributes, but don't need to create a new element
            attrs = " ".join([f'{k}="{html.escape(v)}"' for k,v in child.attrib.items() if '{' not in k])
            if attrs:
                  parts.append(f'<i {attrs}>{inner_html_for_title}</i>')
            else:
                  parts.append(f'<i>{inner_html_for_title}</i>')

        elif tagname == "emph":
            # Convert TEI <emph> to HTML <em>
            inner_html_for_emph = build_inner_html(child) # Get content recursively
            attrs = " ".join([f'{k}="{html.escape(v)}"' for k,v in child.attrib.items() if '{' not in k]) # Keep attributes if any
            if attrs:
                parts.append(f'<em {attrs}>{inner_html_for_emph}</em>')
            else:
                parts.append(f'<em>{inner_html_for_emph}</em>')
        
        elif tagname == "hi":
            # Convert TEI <hi> to HTML <i> (italics)
            inner_html_for_hi = build_inner_html(child) # Get content recursively
            # Check for specific rendering attributes if needed, otherwise default to <i>
            rend_attr = child.get('rend', '').lower()
            html_tag = 'i' # Default to italics
            if 'bold' in rend_attr:
                 html_tag = 'b'
            elif 'italic' in rend_attr:
                 html_tag = 'i'
            # You could add more rend checks here (e.g., 'underline', 'superscript')

            attrs = " ".join([f'{k}="{html.escape(v)}"' for k,v in child.attrib.items() if '{' not in k and k != 'rend']) # Keep other attributes, exclude rend
            if attrs:
                parts.append(f'<{html_tag} {attrs}>{inner_html_for_hi}</{html_tag}>')
            else:
                parts.append(f'<{html_tag}>{inner_html_for_hi}</{html_tag}>')

        elif tagname == "del":
            # Convert TEI <del> to HTML <del> with brackets and class
            inner_html_for_del = build_inner_html(child) # Get content recursively

            # Start building attributes, adding our specific class
            attrs_dict = {k: v for k, v in child.attrib.items() if '{' not in k}
            existing_class = attrs_dict.get('class', '')
            attrs_dict['class'] = f"{existing_class} tei-del".strip() # Add tei-del class
            attrs = " ".join([f'{k}="{html.escape(v)}"' for k, v in attrs_dict.items()])

            # Add brackets *inside* the <del> tag
            parts.append(f'<del {attrs}>[{inner_html_for_del}]</del>')
        
        # *** HANDLERS FOR QUOTE, BIBL, CIT ***
        elif tagname == "quote" or tagname == "blockquote":
             inner = build_inner_html(child)
             attrs = " ".join([f'{k}="{html.escape(v)}"' for k,v in child.attrib.items() if '{' not in k])
             if attrs:
                  parts.append(f'<blockquote {attrs}>{inner}</blockquote>')
             else:
                  parts.append(f'<blockquote>{inner}</blockquote>')
        
        elif tagname == "bibl":
             inner = build_inner_html(child)
             attrs = " ".join([f'{k}="{html.escape(v)}"' for k,v in child.attrib.items() if '{' not in k])
             if attrs:
                  parts.append(f'<strong {attrs}>{inner}</strong>') # Render bibl as strong
             else:
                  parts.append(f'<strong>{inner}</strong>')

        elif tagname == "cit":
             inner = build_inner_html(child)
             attrs = " ".join([f'{k}="{html.escape(v)}"' for k,v in child.attrib.items() if '{' not in k])
             if attrs:
                  parts.append(f'<em {attrs}>{inner}</em>') # Render cit as em (italic)
             else:
                  parts.append(f'<em>{inner}</em>')


        ## now added

        # *** NEW: Handler for <p> tags found within inline context ***
        elif tagname == "p":
            # Recursively get the inner content of the <p> tag
            inner_html_for_p = build_inner_html(child)
            # Keep any original attributes
            attrs = " ".join([f'{k}=\"{html.escape(v)}\"' for k,v in child.attrib.items() if '{' not in k])
            # Add a specific class to mark this as unexpectedly nested
            class_attr = ' class="nested-p"'
            existing_class = child.get('class')
            if existing_class:
                 class_attr = f' class="nested-p {html.escape(existing_class)}"'

            # Combine attributes and class
            final_attrs = f' {attrs}'.rstrip() + class_attr if attrs else class_attr

            # Wrap in an HTML <p> tag
            parts.append(f'<p{final_attrs}>{inner_html_for_p}</p>')
            #print(f"    - Debug: build_inner_html handled nested TEI tag: {tagname}") # Optional: Change debug message
        # *** END NEW HANDLER ***

        #elif tagname == "quote" or tagname == "blockquote": # Make sure your new block is BEFORE this line if you placed it earlier
            # ... (rest of the existing code) ...
        
        # *** END HANDLERS ***

        else:
            # Fallback for truly unknown or unprocessed TEI tags (like milestone)

# --- NEW DEBUG INFO ---
            page_n = get_preceding_pb_n(child) # Get page number using the helper
            page_info = f" (near page {page_n})" if page_n else " (page N/A)"

            # Attempt to get a text snippet around the tag
            try:
                # Text immediately before (from previous sibling tail or parent text start)
                prev_el = child.getprevious()
                text_before = (prev_el.tail if prev_el is not None else child.getparent().text) or ""
                # Text immediately after (from child's tail)
                text_after = child.tail or ""
                # Text inside the tag itself
                text_inside = "".join(child.itertext()).strip()

                # Create snippet, limit length, escape for printing
                snippet = f"...{text_before.strip()[-30:]} <{tagname}> {text_inside[:30]}... </{tagname}> {text_after.strip()[:30]}..."
                snippet_escaped = html.escape(snippet).replace('\n', ' ')
            except Exception as e:
                snippet_escaped = f"[Snippet error: {e}]" # Fallback if snippet fails
            # --- END NEW DEBUG INFO ---

            unknown_inner = build_inner_html(child) # Keep original recursion
            attrs_dict = {k:v for k,v in child.attrib.items() if '{' not in k}
            attrs_str = " ".join([f'data-tei-{k}="{html.escape(v)}"' for k,v in attrs_dict.items()])
            parts.append(f'<span class="tei-unknown-{tagname}" {attrs_str}>{unknown_inner}</span>') # Keep original HTML output

            
            # --- THIS IS THE CORRECT PRINT STATEMENT ---
            print(f"    - Debug: build_inner_html fallback for TEI tag: {tagname}{page_info}. Context: {snippet_escaped}") 
            # --- END OF CORRECT PRINT ---
        if child.tail:
            parts.append(html.escape(child.tail))

    return "".join(parts).strip()


# *** FUNCTION TO RENDER TABLES ***
def render_table(table_element):
    """Renders a TEI <table> element into an HTML <table>."""
    table_parts = ['<table>']

    head = table_element.find('tei:head', ns)
    if head is not None:
        caption_content = build_inner_html(head)
        if caption_content:
            table_parts.append(f'<caption>{caption_content}</caption>')

    rows = table_element.findall('tei:row', ns)

    if not rows:
        return "<table></table>"

    # Process header row (first row)
    header_row = rows[0]
    table_parts.append('<thead><tr>')
    for cell in header_row.findall('tei:cell', ns):
        cell_content = build_inner_html(cell)
        table_parts.append(f'<th>{cell_content}</th>')
    table_parts.append('</tr></thead>')

    # Process body rows
    table_parts.append('<tbody>')
    for row in rows[1:]: # Skip the first row (header)
        table_parts.append('<tr>')
        for cell in row.findall('tei:cell', ns):
            cell_content = build_inner_html(cell)
            table_parts.append(f'<td>{cell_content}</td>')
        table_parts.append('</tr>')
    table_parts.append('</tbody>')

    table_parts.append('</table>')
    return "\n".join(table_parts)
# *** END FUNCTION ***


# *** render_div DEFINITION ***
def render_div(el): # <--- REMOVED toc_id=None
    """Renders a div, handling its header and ALL children (p, sp, castList, tables, etc.)."""
    parts = []

    # Resolve book/chapter context for this div
    book_n = first_ancestor_n(el, 'book') or el.get('n') if el.get('subtype') == 'book' else first_ancestor_n(el, 'book')
    chap_n = first_ancestor_n(el, 'chapter') or el.get('n') if el.get('subtype') == 'chapter' else first_ancestor_n(el, 'chapter')

    # --- 1. RENDER THE HEADER ---
    head = el.find('tei:head', ns)
    if head is not None:
        head_html = build_inner_html(head).strip()
        if head_html:
            # *** NEW ID LOGIC: Read from the attribute we will set in Cell 3 ***
            toc_id_from_attr = el.get("data-toc-id")
            id_attr = f' id="{toc_id_from_attr}"' if toc_id_from_attr else '' 

            if book_n and chap_n:
                chap_label = f"{book_n}.{chap_n}"
                chap_id = chap_label.replace('.', '-')
                parts.append(f'<h3{id_attr}><a class="loc" id="loc-{chap_id}">{chap_label}</a> {head_html}</h3>')
            else:
                title_tag = head.find("tei:title[@type='main']", ns)
                if title_tag is not None:
                     parts.append(f"<h2 class='index-main'{id_attr}>{head_html}</h2>")
                else:
                     parts.append(f"<h3{id_attr}>{head_html}</h3>") 
    # *** FIX FOR HEAD-LESS DIVS (like in Xenophon sections) ***
    else: 
        # Check for a toc-id even if there's no head
        toc_id_from_attr = el.get("data-toc-id")
        if toc_id_from_attr:
            # Add a hidden anchor for the TOC to link to
            parts.append(f'<a id="{toc_id_from_attr}"></a>')

    # --- 2. RENDER ALL CHILDREN (p, sp, castList, table, sub-divs, etc.) ---
    for element in el.iterchildren():
        if not isinstance(element.tag, str): # Skip comments
            continue

        tag = etree.QName(element.tag).localname

        if tag == 'head':
            continue # Already processed above

        elif tag == 'p':
            p_html = build_inner_html(element)
            attrs = " ".join([f'{k}=\"{v}\"' for k,v in element.attrib.items() if '{' not in k])
            b = first_ancestor_n(element, 'book') or book_n
            c = first_ancestor_n(element, 'chapter') or chap_n
            # --- CORRECTED LOGIC FOR 's' ---
            s = element.get('n') or preceding_section_milestone_n(element) or first_ancestor_n(element, 'section')
            # --- END CORRECTION ---
            
            if not s and (b and c):
                # Check if this is the first 'p' in its div, if so, number it '1'
                # This fallback counter will now only run if no section div is found
                if element.getprevious() is None or etree.QName(element.getprevious().tag).localname != 'p':
                     section_counter[(b, c)] = 1
                     s = '1'
                else:
                     section_counter[(b, c)] += 1
                     s = str(section_counter[(b, c)])
            
            if b and c and s:
                label = f"{b}.{c}.{s}"
                pid = label.replace('.', '-')
                parts.append(f'<p{" "+attrs if attrs else ""}><a class="loc" id="loc-{pid}">{label}</a> {p_html}</p>')
            else:
                parts.append(f'<p{" "+attrs if attrs else ""}>{p_html}</p>')


        # ... (inside the loop iterating through children in render_div)
        elif tag == 'list':
             # Find the head inside the list
             list_head = element.find('tei:head', ns)
             if list_head is not None:
                  # Render the head (e.g., as <h4>)
                  head_html = build_inner_html(list_head).strip()
                  if head_html:
                       parts.append(f"<h4>{head_html}</h4>")

             # Start the HTML list
             parts.append('<ul>')
             # Process each item within the list
             for item in element.findall('tei:item', ns):
                  # Extract item content and the ref for the link
                  item_content_parts = []
                  if item.text:
                      item_content_parts.append(html.escape(item.text).strip())

                  ref_tag = item.find('tei:ref', ns)
                  link_html = ""
                  if ref_tag is not None:
                      ref_n = ref_tag.get('n', '').replace('pg.', '') # Get page num like '1', 'v'
                      ref_text = "".join(ref_tag.itertext()).strip()
                      if ref_n and ref_text:
                          link_html = f' <a href="#pb-{ref_n}" class="toc-page-link">{ref_text}</a>'
                      # Remove ref tag's text/tail from main content if needed, or handle within build_inner_html
                      # Simplest might be to just append link_html separately

                  # Append remaining inline content if any (before the ref)
                  # This part needs refinement based on how build_inner_html works
                  # For simplicity now, let's assume text before ref is handled
                  item_description = "".join(item_content_parts).strip() # Simplified

                  parts.append(f'<li>{item_description}{link_html}</li>')

                  if item.tail and item.tail.strip(): # Add tail text if present after item processing
                       parts.append(html.escape(item.tail))

             parts.append('</ul>')
        # ... (rest of render_div)
        
        elif tag == 'sp':
             sp_inner_html_parts = []
             speaker_tag = element.find('tei:speaker', ns)
             speaker_text_full = "" # Store speaker text before removing
             if speaker_tag is not None:
                 speaker_text = "".join(speaker_tag.itertext()).strip()
                 start_line = speaker_tag.get('start-line')
                 end_line = speaker_tag.get('end-line')
                 line_range_text = ""
                 if start_line and end_line:
                     line_range_text = f" [{start_line}–{end_line}]"
                 elif start_line:
                     line_range_text = f" [Line {start_line}]"
                 if speaker_text:
                     speaker_text_full = f"{html.escape(speaker_text)}{line_range_text}"
                     sp_inner_html_parts.append(f'<span class="speaker">{speaker_text_full}</span>')
                 
                 # Remove speaker tag *after* getting text, before processing rest of 'sp'
                 speaker_tag.getparent().remove(speaker_tag)


             # Now process the rest of the <sp> content
             sp_inner_content = build_inner_html(element)
             sp_inner_html_parts.append(sp_inner_content)
             final_sp_html = "".join(sp_inner_html_parts).strip()
             if final_sp_html:
                 parts.append(f'<div class="speech">{final_sp_html}</div>')


        elif tag == 'castList':
            cl_head = element.find('tei:head', ns)
            if cl_head is not None:
                parts.append(f"<h4>{html.escape(''.join(cl_head.itertext()).strip())}</h4>")
            parts.append('<ul class="cast-list">')
            for item in element.findall('tei:castItem', ns):
                role_tag = item.find('tei:role', ns)
                role_text = ""
                desc_text = ""
                if role_tag is not None:
                    role_text = "".join(role_tag.itertext()).strip()
                    if role_tag.tail:
                        desc_text = role_tag.tail.strip().lstrip(',').strip()
                else:
                    # If no <role>, use the whole item text as description
                    desc_text = "".join(item.itertext()).strip()
                
                # Build the list item HTML    
                if role_text:
                    parts.append(f'<li><span class="role">{html.escape(role_text)}</span> <span class="role-desc">{html.escape(desc_text)}</span></li>')
                elif desc_text: # Handle case where there's only description text
                    parts.append(f'<li><span class="role-desc">{html.escape(desc_text)}</span></li>')
            parts.append("</ul>")

        elif tag == 'l':
             l_inner_html = build_inner_html(element)
             if l_inner_html:
                 parts.append(f'<p class="verse-line">{l_inner_html}</p>')
        
        elif tag == 'hr' or tag == 'span': # Handle pre-processed pb/unclear/add tags
             element_html = etree.tostring(element, encoding='unicode', method='html', with_tail=False).strip()
             parts.append(element_html)
             if element.tail:
                  parts.append(html.escape(element.tail))
        
        elif tag == 'table':
             parts.append(render_table(element))
        
        elif tag == 'ab': # Handle <ab> tag (often used in indexes)
             ab_html = build_inner_html(element)
             parts.append(f"<p>{ab_html}</p>") # Treat as paragraph

        elif tag == 'div':
            # Recurse for nested divs
            parts.append(render_div(element))
        
        else: # Fallback
            element_html = build_inner_html(element)
            if element_html:
               parts.append(f'<div class="tei-unknown-block-{tag}">{element_html}</div>')
               print(f"    - Info: render_div fallback for TEI block tag: {tag}")
               if element.tail:
                    parts.append(html.escape(element.tail))
            elif element.tail:
                 parts.append(html.escape(element.tail))

    return "\n".join(parts)


# *** render_index_div DEFINITION ***
def render_index_div(el):
    """Render an index div (now just calls render_div)"""
    return render_div(el) # Pass el directly

In [3]:
import re
from lxml import etree
import html
import os
import glob # Import glob to find files

# XML namespaces (assuming TEI)
ns = {'tei': 'http://www.tei-c.org/ns/1.0',
      'xml': 'http://www.w3.org/XML/1998/namespace'} # Add xml namespace

def process_play(xml_filepath, html_output_filepath):
    """
    Parses a TEI XML file, extracts the English text
    and footnotes, and generates a two-column HTML file.
    Handles different TEI structures and nested tags within notes.
    Implements bidirectional linking between text refs and footnotes.
    Correctly processes pb, lb, and s tags anywhere in the main text.
    """
    print(f"\nProcessing '{os.path.basename(xml_filepath)}'...")
    try:
        # --- Initialize sets to track keys ---
        found_note_keys = set()
        linked_ref_keys = set()
        
        # --- 1. Parsing ---
        parser = etree.XMLParser(remove_blank_text=True, recover=True)
        root = etree.parse(xml_filepath, parser)
        print("   - XML parsed.")

        # --- 2. Data Extraction ---
        footnote_col = {} # Dictionary to hold notes, keyed by a unique identifier

        # *** PRE-PROCESSING STEP FOR NOTES ***
        # This block pre-formats content *inside* <note> tags
        all_notes = root.findall('.//tei:note', ns)
        for note in all_notes:
            for elem in list(note.xpath('.//*')): # Use list() for safe iteration
                parent = elem.getparent()
                if parent is None: continue

                tag = etree.QName(elem.tag).localname

                if tag == 'l':
                    br = etree.Element("br")
                    br.tail = elem.tail
                    line_text = "".join(elem.itertext()).strip()
                    previous_sibling = elem.getprevious()
                    if previous_sibling is not None:
                        previous_sibling.tail = (previous_sibling.tail or '') + line_text
                    else:
                        parent.text = (parent.text or '') + line_text
                    parent.replace(elem, br)
                elif tag == 'quote':
                    bq = etree.Element("blockquote")
                    bq.text = elem.text; bq.tail = elem.tail
                    for k, v in elem.attrib.items():
                        if k == '{http://www.w3.org/XML/1998/namespace}lang': bq.set('lang', v)
                        elif '{' not in k: bq.set(k, v)
                    for child in list(elem): bq.append(child)
                    parent.replace(elem, bq)
                elif tag == 'foreign':
                    i = etree.Element("i")
                    i.text = elem.text; i.tail = elem.tail
                    for k, v in elem.attrib.items():
                        if k == '{http://www.w3.org/XML/1998/namespace}lang': i.set('lang', v)
                        elif '{' not in k: i.set(k, v)
                    for child in list(elem): i.append(child)
                    parent.replace(elem, i)
                elif tag == 'bibl':
                    strong = etree.Element("strong")
                    strong.text = elem.text; strong.tail = elem.tail
                    for k, v in elem.attrib.items():
                         if '{' not in k: strong.set(k, v)
                    for child in list(elem): strong.append(child)
                    parent.replace(elem, strong)
                elif tag == 'title':
                    i_title = etree.Element("i")
                    i_title.text = elem.text; i_title.tail = elem.tail
                    for k, v in elem.attrib.items():
                         if '{' not in k: i_title.set(k, v)
                    for child in list(elem): i_title.append(child)
                    parent.replace(elem, i_title)
                elif tag == 'gloss':
                    span_gloss = etree.Element("span")
                    span_gloss.set("class", "gloss")
                    gloss_title_attr = elem.get('target', elem.get('rend', ''))
                    if gloss_title_attr:
                        span_gloss.set('title', html.escape(gloss_title_attr)) 
                    span_gloss.text = elem.text
                    for child in list(elem):
                        span_gloss.append(child)
                    span_gloss.tail = elem.tail
                    parent.replace(elem, span_gloss)
                elif tag == 'emph' or tag == 'hi': 
                    new_tag_name = 'em' if tag == 'emph' else 'i' 
                    fmt_tag = etree.Element(new_tag_name)
                    fmt_tag.text = elem.text; fmt_tag.tail = elem.tail
                    for child in list(elem): fmt_tag.append(child)
                    parent.replace(elem, fmt_tag)
                elif tag == 'q': 
                    q_tag = etree.Element("q") 
                    q_tag.text = elem.text; q_tag.tail = elem.tail
                    for child in list(elem): q_tag.append(child)
                    parent.replace(elem, q_tag)
                
        # --- A. Extract Footnotes (Robustly) ---
        all_notes = root.findall('.//tei:note', ns) # Find all notes again
        notes_processed_count = 0
        for note in all_notes:
            key = None
            n_attr = note.get('n')
            xml_id = note.get('{http://www.w3.org/XML/1998/namespace}id')

            if xml_id and xml_id.startswith('note-'):
                key = xml_id.replace('note-', '').replace('-', '.') 
            elif note.get('type') == 'footnote' and n_attr: # Simplified key detection
                key = n_attr
            elif n_attr: 
                 ref_exists = root.xpath(f'.//tei:ref[@n=\"{n_attr}\"]', namespaces=ns)
                 if ref_exists or not xml_id: # Prioritize n if ref exists or no xml_id
                     key = n_attr
                 elif xml_id: 
                     key = xml_id
            elif xml_id: 
                 key = xml_id

            if not key:
                print(f"   - Warning: Skipping note without a usable key: {etree.tostring(note, encoding='unicode')[:80]}...")
                continue
            
            found_note_keys.add(key)
            
            note_parts = [html.escape(note.text or '')]
            for child in note:
                note_parts.append(etree.tostring(child, encoding='unicode', method='html'))
            note_html_content = "".join(note_parts).strip()
            # Ensure key is a string for f-string
            key_str = str(key) 
            html_content = f'<div class="line" id="note-{key_str}"><a class="note-ref" data-note="{key_str}">{key_str}</a> <span class="text">{note_html_content}</span></div>'
            
            if key in footnote_col:
                print(f"   - WARNING: Duplicate note key detected: '{key_str}'. Overwriting previous entry.")
            
            footnote_col[key] = html_content
            notes_processed_count += 1
            
        # Remove the <note> tags from the main tree AFTER processing them
        all_notes_again = root.findall('.//tei:note', ns)
        for note in all_notes_again:
            parent = note.getparent()
            if parent is not None:
                if note.tail:
                    prev = note.getprevious()
                    if prev is not None:
                        prev.tail = (prev.tail or '') + note.tail
                    else:
                        parent.text = (parent.text or '') + note.tail
                parent.remove(note)


        
        # Sort footnotes for output
        def sort_key(n_str):
            parts = re.split(r'[.-]', str(n_str)) # Add str() for safety
            if not parts: # Handle empty or invalid keys
                return (2, []) # Give lowest priority

            first_part = parts[0]
            # Priority: 0 for keys starting with a letter (like Roman numerals), 1 otherwise
            priority = 0 if first_part and first_part[0].isalpha() else 1

            processed_parts = [p.zfill(5) if p.isdigit() else p.lower() for p in parts]
            # Return a tuple: (priority, processed_parts)
            return (priority, processed_parts)

        sorted_note_keys = sorted(footnote_col.keys(), key=sort_key)
        footnote_html = "\n".join([footnote_col.get(key, '') for key in sorted_note_keys])
        
        # --- B. Transform English Text XML for HTML ---
        
        main_text_body = root.find('.//tei:body', ns)

        if main_text_body is None:
            print(f"   - Error: Could not find <body> tag. Skipping file.")
            return
        else:
             print(f"   - Debug: main_text_body set to <body>.")
        
        # --- Pre-process main text elements ---
        
        # 1. Replace <ref n="..."> tags with <a> links (searching from root)
        all_refs_in_doc = root.findall('.//tei:ref', ns)
        print(f"   - Debug: Found {len(all_refs_in_doc)} refs using root.findall. Processing them...")
        refs_found = 0 
        refs_linked = 0
        for ref in list(all_refs_in_doc): 
            refs_found += 1 
            n_attr = ref.get('n')
            if not n_attr:
                refs_found -= 1 
                continue 

            actual_key_found = None
            potential_key1 = n_attr
            potential_key2 = n_attr.replace('-', '.') if '-' in n_attr else None
            potential_key3 = n_attr.replace('note-', '').replace('-', '.') # For keys like 'note-iv-1'

            if potential_key1 in footnote_col:
                actual_key_found = potential_key1
            elif potential_key2 in footnote_col:
                actual_key_found = potential_key2
            elif potential_key3 in footnote_col:
                actual_key_found = potential_key3

            if actual_key_found:
                refs_linked += 1
                linked_ref_keys.add(actual_key_found)
                
                key_str = str(actual_key_found) # Ensure key is string
                a_tag = etree.Element("a")
                a_tag.set("href", f"#note-{key_str}")
                a_tag.set("class", "fn-link")
                a_tag.set("data-note", key_str)
                a_tag.text = key_str 
                a_tag.tail = ref.tail 

                parent = ref.getparent()
                if parent is not None:
                    try:
                        index = parent.index(ref)
                        parent.remove(ref)
                        parent.insert(index, a_tag)
                    except ValueError:
                         print(f"   - Warning: Could not find index for ref '{n_attr}' to replace it.")
                else:
                    print(f"   - Warning: Ref '{n_attr}' has no parent, cannot replace.")
            else:
                print(f"   - Warning: Ref '{n_attr}' found but no matching note key ({potential_key1}, {potential_key2}, or {potential_key3}) in footnote_col.")
                refs_found -= 1

        # 2. Replace <lb n="..."> tags with <br> tags
        for lb in list(main_text_body.findall('.//tei:lb', ns)):
            br_tag = etree.Element("br")
            br_tag.tail = lb.tail
            parent = lb.getparent()
            if parent is not None:
                parent.replace(lb, br_tag)

        # 3. Replace <pb n="..."> tags
        for pb in list(main_text_body.findall('.//tei:pb', ns)):
            parent = pb.getparent()
            if parent is None: continue

            pb_n = pb.get('n', '')
            hr_tag = etree.Element('hr')
            hr_tag.set('class', 'pb-hr')
            span_tag = etree.Element('span')
            span_tag.set('class', 'pb')
            # --- START MODIFICATION ---
            if pb_n: # Only add an ID if there's a page number
                # Create a simple ID based on the page number
                span_tag.set('id', f'pb-{pb_n}')
            # --- END MODIFICATION ---            
            span_tag.text = f"[Page {pb_n}]" if pb_n else "[Page Break]"
            span_tag.tail = pb.tail

            try:
                pb_index = parent.index(pb)
                parent.insert(pb_index, hr_tag)
                parent.insert(pb_index + 1, span_tag)
                parent.remove(pb)
            except ValueError:
                print(f"   - Warning: Could not find index for <pb> tag. Appending to parent.")
                parent.append(hr_tag)
                parent.append(span_tag)
                parent.remove(pb)

        # 4. Replace ALL <stage> tags
        for stage in list(main_text_body.findall('.//tei:stage', ns)):
            parent = stage.getparent()
            if parent is None: continue
            span_tag = etree.Element("span")
            span_tag.set("class", "stage")
            span_tag.text = stage.text
            for child in list(stage):
                span_tag.append(child)
            span_tag.tail = stage.tail
            parent.replace(stage, span_tag)    

        # *** UPDATED PRE-PROCESSING BLOCK ***
        
        # 5. "Unwrap" all <s> tags (remove tag, keep text)
        s_tags_found_count = len(main_text_body.findall('.//tei:s', ns))
        print(f"   - Debug: Found {s_tags_found_count} <s> tags to remove.")
        if s_tags_found_count > 0:
            try:
                etree.strip_tags(main_text_body, '{http://www.tei-c.org/ns/1.0}s')
                print(f"   - Debug: Successfully stripped <s> tags.")
            except Exception as e:
                print(f"   - Warning: Could not strip <s> tags: {e}")
        
        # 6. Convert all <foreign> tags to <i>
        for foreign_element in list(main_text_body.findall('.//tei:foreign', ns)):
            parent = foreign_element.getparent()
            if parent is None: continue
            
            i_tag = etree.Element("i") # Simple HTML tag
            i_tag.text = foreign_element.text
            i_tag.tail = foreign_element.tail
            lang = foreign_element.get('{http://www.w3.org/XML/1998/namespace}lang')
            if lang:
                i_tag.set('lang', lang)
            
            for child in list(foreign_element):
                i_tag.append(child)
            parent.replace(foreign_element, i_tag)
            
        # 7. Convert all <unclear> tags to <span>
        for unclear_element in list(main_text_body.findall('.//tei:unclear', ns)):
            parent = unclear_element.getparent()
            if parent is None: continue
            
            span_tag = etree.Element("span") # Simple HTML tag
            span_tag.set("class", "unclear")
            span_tag.text = unclear_element.text
            span_tag.tail = unclear_element.tail
            for child in list(unclear_element):
                span_tag.append(child)
            parent.replace(unclear_element, span_tag)
            
        # 8. Convert <add> tags to <span class="tei-add">
        for add_element in list(main_text_body.findall('.//tei:add', ns)):
            parent = add_element.getparent()
            if parent is None: continue
            
            span_tag = etree.Element("span") # Simple HTML tag
            span_tag.set("class", "tei-add")
            span_tag.text = add_element.text
            span_tag.tail = add_element.tail
            for child in list(add_element):
                span_tag.append(child)
            parent.replace(add_element, span_tag)
            
        # *** END OF PRE-PROCESSING BLOCK ***
        
        # (End of Section B processing)
        print(f"   - Data extracted: {notes_processed_count} footnotes processed.")
        print(f"   - Refs found: {refs_found}, Refs linked: {refs_linked}")

        print(f"   - Debug C: Starting serialization...")
        # --- C. Serialize Transformed XML to HTML String ---

        # *** NEW RECURSIVE TOC FUNCTIONS (defined inside process_play) ***
        
        def assign_toc_ids(div_elements, prefix):
            """Recursively iterates divs and assigns 'data-toc-id' attribute."""
            for i, div in enumerate(div_elements):
                toc_id = f"{prefix}-{i+1}"
                div.set("data-toc-id", toc_id)
                
                # Recurse for child divs
                child_divs = div.xpath('./tei:div', namespaces=ns)
                if child_divs:
                    assign_toc_ids(child_divs, toc_id)

        # *** THIS IS THE MODIFIED FUNCTION (FIX #2) ***
        def build_toc_html(div_elements):
            """Recursively builds nested <ul> HTML from divs with 'data-toc-id'."""
            if not div_elements:
                return ""
            
            parts = ['<ul>']
            for div in div_elements:
                toc_id = div.get("data-toc-id")
                head = div.find('tei:head', ns)
                head_text = None # Start with None
                
                if head is not None:
                    processed_head_text = build_inner_html(head).strip()
                    if processed_head_text:
                        head_text = processed_head_text
                
                # *** NEW FALLBACK LOGIC ***
                if not head_text: # If head was missing or empty
                    n_attr = div.get('n')
                    # Use subtype if present (e.g., 'book', 'chapter'), otherwise default to 'Section'
                    subtype_attr = div.get('subtype', 'Section').capitalize()
                    if n_attr:
                        # Format it: e.g., "Book I", "Chapter 1", "Section 1"
                        head_text = f"{subtype_attr} {n_attr}"
                    else:
                        # Final fallback
                        head_text = "Untitled Section"
                # *** END NEW LOGIC ***
                
                parts.append(f'<li><a href="#{toc_id}">{head_text}</a>')
                
                # Recurse for child divs
                child_divs = div.xpath('./tei:div', namespaces=ns)
                if child_divs:
                    parts.append(build_toc_html(child_divs)) # Append nested <ul>
                
                parts.append('</li>')
            
            parts.append('</ul>')
            return "\n".join(parts)
        # *** END OF MODIFIED FUNCTION ***


        toc_html = ""
        has_toc = False
        top_level_divs = main_text_body.xpath('./tei:div', namespaces=ns)
    
        if len(top_level_divs) > 1:
            has_toc = True
            print(f"   - Debug: Found {len(top_level_divs)} top-level divs. Generating TOC...")
            
            # 1. Assign IDs to the XML tree
            assign_toc_ids(top_level_divs, "toc")
            
            # 2. Build the HTML string from the tree
            toc_html_render = build_toc_html(top_level_divs)
            
            # 3. Add the close button
            # Use '✕' (multiplication sign) for the close button
            toc_html = f'<button id="toc-toggle" title="Hide Table of Contents">✕</button>\n{toc_html_render}'
        else:
            print(f"   - Debug: Found {len(top_level_divs)} top-level div(s). No TOC needed.")
        
        html_builder = []
        
        iterator = main_text_body.iterchildren()

        for element in iterator:
            if not isinstance(element.tag, str): # Skip comments
                continue 
            
            tag = etree.QName(element.tag).localname

            try:
                # *** SIMPLIFIED RENDER LOOP ***
                if tag == 'div':
                    div_subtype = element.get('subtype')
                    if div_subtype == 'index':
                        html_builder.append(render_index_div(element))
                    else:
                        html_builder.append(render_div(element))
                else:
                    # This catches tags directly under <body> (like a stray <pb>)
                    if tag == 'hr' or tag == 'span': # Handle pre-processed pb/unclear/add tags
                         element_html = etree.tostring(element, encoding='unicode', method='html', with_tail=False).strip()
                         html_builder.append(element_html)
                         if element.tail:
                              html_builder.append(html.escape(element.tail))
                    elif tag == 'p' or tag == 'ab': # Handle loose paragraphs
                         # This calls the Cell 2 function
                         p_html = build_inner_html(element)
                         html_builder.append(f'<p>{p_html}</p>')
                    elif tag == 'head': 
                         # This calls the Cell 2 function
                         head_html = build_inner_html(element).strip()
                         if head_html:
                             html_builder.append(f"<h3>{head_html}</h3>") 
                    elif tag == 'castList': 
                         cl_head = element.find('tei:head', ns)
                         if cl_head is not None:
                             html_builder.append(f"<h4>{html.escape(''.join(cl_head.itertext()).strip())}</h4>")
                         html_builder.append('<ul class="cast-list">')
                         for item in element.findall('tei:castItem', ns):
                             role_tag = item.find('tei:role', ns)
                             role_text = ""
                             desc_text = ""
                             if role_tag is not None:
                                 role_text = "".join(role_tag.itertext()).strip()
                                 if role_tag.tail:
                                     desc_text = role_tag.tail.strip().lstrip(',').strip()
                             else:
                                 desc_text = "".join(item.itertext()).strip()
                             if role_text:
                                 html_builder.append(f'<li><span class="role">{html.escape(role_text)}</span> <span class="role-desc">{html.escape(desc_text)}</span></li>')
                             elif desc_text:
                                 html_builder.append(f'<li><span class="role-desc">{html.escape(desc_text)}</span></li>')
                         html_builder.append("</ul>")
                    else:
                        # Fallback for unexpected tags directly under body
                        print(f"    - Info: Main loop fallback for tag: {tag}")
                        element_html = etree.tostring(element, encoding='unicode', method='html', with_tail=False).strip()
                        if element_html:
                           html_builder.append(element_html)
                           if element.tail:
                                html_builder.append(html.escape(element.tail))
            except Exception as e:
                print(f"    - Warning: Could not serialize element {tag} (Main Loop): {e}")    
        
        english_html = "\n".join(html_builder) # Join the final HTML parts
        
        # --- 3. HTML Generation (Dynamic Metadata) ---
        header = root.find('.//tei:teiHeader', ns)
        main_title = "Untitled Document"
        author_text = "Unknown Author"
        translator_text = ""

        if header is not None:
# --- NEW: Extract Publication Date (REVISED) ---
# --- NEW: Extract Publisher and Date ---
            publisher_name = ""
            year_text = ""
            publication_date_text = "" # This will hold the final (Publisher YYYY) string

            # 1. Find Publisher
            publisher_path = './/tei:fileDesc/tei:sourceDesc//tei:imprint/tei:publisher'
            publisher_tag = header.find(publisher_path, ns)
            if publisher_tag is not None:
                publisher_name = "".join(publisher_tag.itertext()).strip()

            # 2. Find Date (using our existing logic)
            date_tag = None
            source_date_path = './/tei:fileDesc/tei:sourceDesc//tei:date'
            date_tag = header.find(source_date_path, ns)
            if date_tag is None:
                digital_date_path = './/tei:fileDesc/tei:publicationStmt//tei:date'
                date_tag = header.find(digital_date_path, ns)

            print(f'\n\nPublisher tag found: {publisher_tag is not None}, Date tag found: {date_tag is not None}')

            if date_tag is not None:
                date_val = date_tag.get('when', "".join(date_tag.itertext()).strip())
                year_match = re.search(r'\d{4}', date_val)
                if year_match:
                    year_text = year_match.group(0) # Just get "1849"
                elif date_val:
                    year_text = date_val.strip() # Fallback

            # 3. Combine them into the (Publisher YYYY) format
            if publisher_name and year_text:
                publication_date_text = f" ({publisher_name} {year_text})" # e.g., (Bohn 1849)
            elif publisher_name:
                publication_date_text = f" ({publisher_name})"
            elif year_text:
                publication_date_text = f" ({year_text})"
            # --- END REVISED ---                    
            # --- END NEW ---


            
            title_tag = header.find('.//tei:fileDesc/tei:titleStmt/tei:title', ns)
            if title_tag is not None:
                main_title = "".join(title_tag.itertext()).strip()
            author_tag = header.find('.//tei:fileDesc/tei:titleStmt/tei:author', ns)
            if author_tag is not None:
                author_text = "".join(author_tag.itertext()).strip()
            translators = header.findall('.//tei:fileDesc/tei:titleStmt/tei:editor[@role="translator"]', ns)
            if translators:
                translator_names = ["".join(t.itertext()).strip() for t in translators]
                translator_names = [name for name in translator_names if name]
                if translator_names:
                    translator_text = f" ({', '.join(translator_names)}, trans.)"

        author_display = f"{author_text}{translator_text}{publication_date_text}"
        text_lang_tag = root.find('.//tei:text', ns)
        text_lang = 'en'
        if text_lang_tag is not None:
            text_lang = text_lang_tag.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
        
        # --- C-bis. Build TOC Wrapper HTML ---
        toc_wrapper_html = "" # Default to empty
        if has_toc:
            print("   - Debug: Building TOC wrapper...")
            # We build this string here to avoid nested f-strings in the main template
            toc_wrapper_html = f"""
                <div class="toc-wrapper">
                    <button id="toc-open-button" title="Show Table of Contents">☰</button>
                    <div class="column toc-column" id="toc-column">
                        <h3>Table of Contents</h3>
                        <div class="content" id="toc-content">{toc_html}</div>
                    </div>
                </div>
                """
        else:
            print("   - Debug: No TOC wrapper needed.")


        
        # --- HTML Template (NOW INCLUDES TOC LOGIC) ---
        print("main_title =", main_title)
        print("author_display =", author_display)
        print("text_lang =", text_lang)
        
        # --- HTML Template (Broken into pieces for debugging) ---
        print("   - Debug: Assembling HTML template...")
        
        # Helper to safely substitute only our intended placeholders in non-f-strings
        def _fmt(s):
            return s.replace("{text_lang}", text_lang).replace("{main_title}", main_title)

        try:
            parts = []
            
            # --- Part 1: HTML Head and CSS ---
            print("     - Assembling: Head and CSS")
            parts.append(_fmt("""<!DOCTYPE html>
<html lang="{text_lang}">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{main_title}</title>
    <style>
    body {{ font-family: 'Georgia', serif; display: flex; flex-direction: column; height: 100vh; margin: 0; background-color: #fdfdfd; }}
        header {{ padding: 10px 20px; border-bottom: 2px solid #ddd; background-color: #fff; text-align: center; z-index: 10; }}
        h1 {{ margin: 0; font-size: 1.8em; color: #333; }}
        h2 {{ margin: 5px 0 0; font-size: 1.2em; color: #666; font-style: italic; font-weight: normal;}}
        .container {{
            display: grid;
            /* Default: 2 columns */
            grid-template-columns: 2fr 1fr;
            gap: 15px;
            flex-grow: 1;
            padding: 10px;
            overflow: hidden;
        }}
                        /* --- Fix TOC scroll behavior --- */
        .container {{
            min-height: 0; /* allow flex/grid children to shrink and scroll */
        }}

        .toc-wrapper {{
            min-height: 0; /* same fix for nested flex/grid container */
        }}

        /* *** 3-column layout when TOC is present *** */
        .container.has-toc {{
            grid-template-columns: auto 2fr 1fr; /* TOC wrapper, Text, Notes */
        }}
        .column {{
            background-color: #ffffff;
            border: 1px solid #e0e0e0;
            border-radius: 4px;
            display: flex;
            flex-direction: column;
            overflow: hidden;  /* This is fine for .column */
        }}

        /* *** Wrapper for TOC column + Open Button *** */
        .toc-wrapper {{
            position: relative; /* Context for the close button */
            width: 250px; /* Initial width */
            transition: width 0.3s ease, min-width 0.3s ease;
            min-width: 250px;
            /* The wrapper itself is a column in the grid, so it will be flex */
            display: flex;
            flex-direction: column;
            /* Use transparent background for the wrapper */
            background-color: transparent;
            border: none;
        }}

        /* *** TOC Column Styles (THE FIX IS HERE) *** */
        .toc-column {{
            width: 100%; /* Fill the wrapper */
            flex-grow: 1;
            border: 1px solid #e0e0e0; /* Border now on the column */
            border-radius: 4px; /* Rounded corners */
            background-color: #f8f9fa;
            display: flex;
            flex-direction: column;
            overflow-y: auto;   /* <-- FIX 1: This column scrolls */
            min-height: 0;      /* <-- FIX 2: This column is allowed to shrink */
        }}
        .toc-column h3 {{ /* TOC Title */
            background-color: #e9ecef; 
            position: sticky; top: 0; z-index: 6;
            font-size: 0.9em !important; /* Make TOC header smaller */
            padding: 12px 12px 12px 35px; /* Add padding for close button */
            text-align: center; margin: 0;
            border-bottom: 1px solid #e0e0e0; 
            color: #444; text-transform: uppercase; letter-spacing: 0.5px;
            border-top-left-radius: 4px; /* Match parent */
            border-top-right-radius: 4px; /* Match parent */
        }}
        #toc-content ul {{
            list-style: none;
            padding: 0;
            margin: 0;
        }}
        /* *** Styling for nested TOC lists *** */
        #toc-content > ul {{
            padding: 10px 15px; /* Padding for the list */
        }}
        #toc-content ul ul {{
            padding-left: 20px; /* Indent nested lists */
            margin: 0;
        }}
        #toc-content li a {{
            display: block;
            padding: 6px 10px;
            text-decoration: none;
            color: #0056b3;
            border-bottom: 1px solid #eee;
            font-size: 0.9em;
            /* overflow: hidden;  Hide overflow */
            white-space: normal;
        }}
        #toc-content li a:hover {{
            background-color: #e9ecef;
        }}
        /* *** Styling for nested TOC items *** */
        #toc-content ul ul li a {{
            font-size: 0.85em; /* Make sub-items slightly smaller */
            color: #333; /* Darken them slightly */
            padding-top: 4px;
            padding-bottom: 4px;
            border-bottom: none; /* No border for sub-items */
        }}
        #toc-content ul ul li a:hover {{
            background-color: #dde; /* Different hover for sub-items */
        }}
/* --- New Collapsible TOC Styles --- */

    /* Hide nested lists by default */
    #toc-content li ul {{
        display: none;
        /* Keep your existing padding-left for indentation */
    }}

    /* Style for parent items that can be clicked */
    #toc-content li.has-children > a {{
        cursor: pointer; /* Show it's clickable */
        /* Add a small indicator */
    }}

    /* Add a clickable triangle/plus sign */
    #toc-content li.has-children > a::before {{
        content: '► '; /* Collapsed state indicator */
        font-size: 0.8em;
        margin-right: 3px;
        color: #555;
    }}

    /* When the .open class is added, show the list */
    #toc-content li.has-children.open > ul {{
        display: block;
    }}

    /* Change the triangle to its expanded state */
    #toc-content li.has-children.open > a::before {{
        content: '▼ '; /* Expanded state indicator */
    }}               
        /* CLOSE Button (inside TOC) */
        #toc-toggle {{
            position: absolute; 
            top: 8px; 
            left: 8px; 
            z-index: 7; /* Above TOC header */
            background: #f8f9fa;
            border: 1px solid #ccc;
            padding: 3px 8px;
            cursor: pointer;
            font-size: 1.1em;
            line-height: 1;
            border-radius: 3px;
        }}

        /* OPEN Button (sits inside wrapper) */
        #toc-open-button {{
             display: none; /* Hidden by default */
             padding: 8px;
             font-size: 1.2em;
             cursor: pointer;
             border: 1px solid #ccc;
             background: #f8f9fa;
             border-radius: 3px;
             margin: 0 0 0 5px; /* Position it nicely in the top-left */
             width: 38px; /* Fixed width */
             height: 38px; /* Fixed height */
             box-sizing: border-box; /* Include padding in size */
        }}

        /* Styles for collapsed TOC */
        body.toc-collapsed .toc-wrapper {{
            width: 45px; /* Width of the open button area */
            min-width: 45px;
        }}
        body.toc-collapsed .toc-column {{
             display: none; /* Hide whole column */
        }}
        body.toc-collapsed #toc-open-button {{
            display: block; /* Show open button */
        }}
        
        /* Adjust main grid when collapsed (NO change needed) */
        body.toc-collapsed .container.has-toc {{
            grid-template-columns: auto 2fr 1fr; 
        }}


/* --- Style for <add> tags --- */
        .tei-add {{
            color: #006400; /* Dark green text */
        }}

        .tei-del {{
            text-decoration: none; 
            font-size: 0.9em; 
            color: #777; 
        }}
        .tei-add::before {{
            content: '<';
            font-weight: bold;
            color: #006400; /* Match text color */
            margin-right: 0.1em; /* Small space after bracket */
        }}
        .tei-add::after {{
            content: '>';
            font-weight: bold;
            color: #006400; /* Match text color */
            margin-left: 0.1em; /* Small space before bracket */
        }}
        /* --- Style for <gloss> tags --- */
        .gloss {{
            border-bottom: 1px dotted #555; /* Subtle dotted underline */
            cursor: help; /* Changes cursor to indicate potential tooltip */
        }}
        /* --- Style for <unclear> tags --- */
        .unclear {{
            opacity: 0.7;
            font-style: italic;
            color: #555;
        }}
        .gloss[title] {{ 
        }}
        .loc {{ color: #666; text-decoration: none; font-family: monospace; margin-right: 0.5em; }}
        .column > h3 {{
            text-align: center; margin: 0; padding: 12px;
            border-bottom: 1px solid #e0e0e0; background-color: #f9f9ff;
            color: #444; font-size: 1em; text-transform: uppercase; letter-spacing: 0.5px;
            position: sticky; top: 0; z-index: 5;
            border-top-left-radius: 4px; /* Match parent */
            border-top-right-radius: 4px; /* Match parent */
        }}
        .content {{
            padding: 15px;
            overflow-y: auto;
            flex-grow: 1;
            line-height: 1.6;
            scroll-behavior: smooth;
            min-height: 0;      /* <-- FIX 3: Allows main/note columns to scroll */
        }}

        /* Override for TOC content to allow parent to scroll */
        #toc-content {{
            overflow-y: visible; /* <-- FIX 4: Let the parent (.toc-column) handle scrolling */
            flex-grow: 0;       /* <-- FIX 5: Stop this from growing to fit its content */
            padding: 0;         /* Padding is moved to the <ul> */
            min-height: auto;   /* Override min-height from .content */
        }}

        /* English Text Column Styles */
        #english-content h2, #english-content h3 {{ scroll-margin-top: 10px; }} /* Add margin for scrolling */
        #english-content h2 {{ font-size: 1.4em; color: #000; font-style: normal; text-align: center; margin-bottom: 1em;}}
        /* Styling for div/head (book/chapter) */
        #english-content h3 {{ 
            font-size: 1.2em; 
            color: #333; 
            font-style: normal; 
            text-align: left; 
            margin-top: 1.5em; 
            margin-bottom: 0.5em; 
            border-bottom: 1px solid #ccc; 
            padding-bottom: 5px;
        }}
        /* More specific for index main titles */
        #english-content h2.index-main {{
            font-size: 1.6em;
            font-style: normal;
            text-align: center;
            margin: 1em 0;
        }}
        #english-content h4 {{ font-size: 1.1em; text-align: center; margin: 1em 0 0.5em; text-transform: uppercase;}}
        #english-content ul {{ list-style-type: none; padding-left: 0; text-align: center; margin-bottom: 1em;}}
        #english-content li {{ margin-bottom: 0.3em; }}

/* New Cast List Styles */
        #english-content .cast-list {{
            list-style-type: none;
            text-align: left; /* Align text to the left */
            margin: 1.5em 3em; /* Give it some horizontal margin */
            padding: 1.5em; /* Add padding inside the box */
            border: 1px solid #eee;
            background: #fdfdfd;
            border-radius: 4px;
        }}
        #english-content .cast-list li {{
            margin-bottom: 0.6em; /* Space out the list items */
        }}
        #english-content .cast-list .role {{
            font-weight: bold;
            color: #800000; /* Match the speaker color */
        }}
        #english-content .cast-list .role-desc {{
            color: #333;
        }}
        
        #english-content .speech {{ margin-bottom: 1em; }}
        #english-content .speaker {{ font-weight: bold; margin-right: 8px; color: #800000; display: block; margin-bottom: 0.2em; }}
        #english-content .pb-hr {{ border: none; border-top: 1px dashed #ccc; margin: 1em 0; }} /* Style hr for pb */
        #english-content .pb {{ display: block; text-align: center; color: #888; font-size: 0.9em; margin-bottom: 1em; }}
        
        #english-content .stage {{
            display: inline-block; 
            font-style: italic; 
            color: #555;
            background-color: #fafafa; 
            padding: 5px 8px;      
            margin: 5px;           
            border: 1px solid #eee;  
            border-radius: 3px;
            line-height: 1.4;      
        }}   

        /* Table Styles */
        #english-content table {{
            border-collapse: collapse;
            margin: 1.5em 0;
            width: 90%;
            margin-left: auto;
            margin-right: auto;
            border: 1px solid #ccc;
        }}
        #english-content caption {{
            font-weight: bold;
            font-size: 1.1em;
            caption-side: top;
            margin-bottom: 0.5em;
        }}
        #english-content th, #english-content td {{
            border: 1px solid #ddd;
            padding: 8px 10px;
            text-align: left;
            font-size: 0.9em;
        }}
        #english-content thead tr {{
            background-color: #f2f2f2;
        }}
        #english-content tbody tr:nth-child(even) {{
            background-color: #f9f9f9;
        }}

        #english-content .fn-link {{
            vertical-align: super; font-size: 0.8em; text-decoration: none;
            color: #007bff; font-weight: bold; padding: 0 2px;
            line-height: 1;
            border-radius: 2px;
            cursor: pointer; /* Add cursor pointer */
        }}
        #english-content .fn-link:hover, #english-content .fn-link.highlight {{
             text-decoration: underline; background-color: #fff9c4;
             outline: 1px solid #fdd835;
             outline-offset: 1px;
        }}
        #english-content p {{ margin: 0.5em 0; }}
        #english-content .verse-line {
          display: block; /* Add this line */
          margin: 0.1em 0 0.1em 2em;
          text-indent: -1em;
        }

        /* Footnote Column Styles */
        .line {{ display: flex; align-items: baseline; padding: 5px; border-radius: 3px; border-bottom: 1px solid #f0f0f0; }}
        .line:last-child {{ border-bottom: none; }}
        .line.highlight {{
            background-color: #fff9c4;
            outline: 1px solid #fdd835;
            outline-offset: -1px; /* Inset outline slightly */
        }}
        .note-ref {{
            flex-shrink: 0; width: 60px; font-size: 0.8em; color: #555;
            cursor: pointer; text-align: right; margin-right: 10px; font-family: monospace;
            padding-top: 0.1em;
        }}
        .note-ref:hover {{ color: #007bff; text-decoration: underline; }}
        .text {{ line-height: 1.5; font-size: 0.9em; flex-grow: 1; }}

        /* Style the converted TEI tags within notes */
        .text blockquote {{
            font-style: italic; color: #333;
            border-left: 3px solid #ccc;
            padding-left: 10px; margin: 0.5em 0 0.5em 5px;
            display: block;
        }}
         .text blockquote br {{
            display: block; content: ""; margin-top: 0.2em;
         }}
        .text i {{ font-style: italic; }}
        .text strong {{ font-weight: bold; }}
        .text br {{
             display: block; content: ""; margin-top: 0.2em;
        }}
    </style>
</head>
"""))
            
            # --- Part 2: Body Header ---
            print("     - Assembling: Body Header")
            parts.append(f"""<body>
    <header>
        <h1>{main_title}</h1>
        <h2>{author_display}</h2>
    </header>
""")

            # --- Part 3: Container Start ---
            print("     - Assembling: Container Start")
            parts.append(f"""
    <div class="container{' has-toc' if has_toc else ''}">
""")

            # --- Part 4: TOC Wrapper (This is a pre-built string, not an f-string) ---
            print("     - Assembling: TOC Wrapper")
            parts.append(toc_wrapper_html)

            # --- Part 5: English Column ---
            print("     - Assembling: English Column")
            parts.append(f"""
        <div class="column">
            <h3>English Translation</h3>
            <div class="content" id="english-content">{english_html}</div>
        </div>
""")

            # --- Part 6: Footnote Column ---
            print("     - Assembling: Footnote Column")
            parts.append(f"""
        <div class="column">
            <h3>Footnotes</h3>
            <div class="content" id="footnote-content">{footnote_html}</div>
        </div>
    </div>
""") # <-- Close the .container div

            # --- Part 7: JavaScript (*** THIS IS THE FINAL FIXED PART ***) ---
            print("     - Assembling: Script")
            parts.append(f"""
    <script>
        // Use {{ and }} for literal braces
        document.addEventListener('DOMContentLoaded', function() {{
            
            // --- Collapsible TOC Code ---
            const tocContent = document.getElementById('toc-content');
            if (tocContent) {{
                // 1. Mark items with children
                tocContent.querySelectorAll('li > ul').forEach(ul => {{
                    ul.parentElement.classList.add('has-children');
                }});

                // 2. Add click listener for toggling
                tocContent.addEventListener('click', function(event) {{
                    const link = event.target.closest('a');
                    if (link && link.parentElement.classList.contains('has-children')) {{
                        // Make sure the click wasn't on the main 'close' button
                        if (event.target.id !== 'toc-toggle') {{
                             event.preventDefault(); // Stop the link from jumping
                             link.parentElement.classList.toggle('open'); // Toggle open/closed
                        }}
                    }}
                }});
            }}
            // --- End of Collapsible TOC Code ---


            // --- Note-Linking Code ---
            const container = document.querySelector('.container');
            const englishContent = document.getElementById('english-content');
            const footnoteContent = document.getElementById('footnote-content');
            let lastHighlightedNoteElement = null;
            let lastHighlightedLinkElements = []; // Can be multiple links

            // Function to remove all highlights
            function removeHighlights() {{
                if (lastHighlightedNoteElement) {{
                    lastHighlightedNoteElement.classList.remove('highlight');
                }}
                lastHighlightedLinkElements.forEach(link => {{
                    link.classList.remove('highlight');
                }});
                lastHighlightedNoteElement = null;
                lastHighlightedLinkElements = [];
            }}

            // Function to find, scroll, and highlight elements
# ... existing code in cell 7 ...
            # Function to find, scroll, and highlight elements\n",
            function findScrollHighlight(targetNoteId, scrollTargetColumn) {{ // scrollTargetColumn is 'note' or 'link'\n", # Escaped JS brace
                removeHighlights(); // Clear previous state\n",

                const noteElement = document.getElementById('note-' + targetNoteId);\n",
                // *** MODIFICATION HERE: Removed concatenation from within the querySelectorAll string ***
                // We let the JavaScript handle the concatenation directly.
                const linkSelector = '.fn-link[data-note=\"' + targetNoteId + '\"]';
                const linkElements = englishContent.querySelectorAll(linkSelector);\n",


                // Highlight Note\n",
                if (noteElement) {{\n", # Escaped JS brace
                    noteElement.classList.add('highlight');\n",
                    lastHighlightedNoteElement = noteElement; // Store for removal\n",
                    // Scroll the note column if the link was clicked\n",
                    if (scrollTargetColumn === 'note') {{\n", # Escaped JS brace
                        // ***** FIX HERE: Use {{...}} for JS object literal *****\n",
                        noteElement.scrollIntoView({{ behavior: 'smooth', block: 'center' }});\n",
                    }}\n", # Escaped JS brace
                }}\n", # Escaped JS brace

                // Highlight ALL corresponding links and scroll the FIRST one\n",
                if (linkElements.length > 0) {{\n", # Escaped JS brace
                    linkElements.forEach(link => {{\n", # Escaped JS brace
                        link.classList.add('highlight');\n",
                    }});\n", # Escaped JS brace
                    lastHighlightedLinkElements = Array.from(linkElements); // Store all for removal\n",
                    // Scroll the first link if the note was clicked\n",
                    if (scrollTargetColumn === 'link') {{\n", # Escaped JS brace
                         // ***** FIX HERE: Use {{...}} for JS object literal *****\n",
                        linkElements[0].scrollIntoView({{ behavior: 'smooth', block: 'center' }});\n",
                    }}\n", # Escaped JS brace
                }}\n", # Escaped JS brace
            }}\n", # Escaped JS brace

            // Add click listener using event delegation on the container\n",
            container.addEventListener('click', function(event) {{\n", # Escaped JS brace
                const linkTarget = event.target.closest('.fn-link');\n",
                const noteRefTarget = event.target.closest('.note-ref');\n",

                if (linkTarget && linkTarget.dataset.note) {{\n", # Escaped JS brace
                    event.preventDefault(); // Prevent default anchor jump behavior\n",
                    const noteId = linkTarget.dataset.note;\n",
                    // *** MODIFICATION HERE: Removed concatenation from within the console.log string ***
                    console.log('Clicked text link for note: ' + noteId); // Debug\n",
                    findScrollHighlight(noteId, 'note'); // Scroll the note column\n",

                }} else if (noteRefTarget && noteRefTarget.dataset.note) {{\n", # Escaped JS brace
                    event.preventDefault(); // Prevent default anchor jump behavior\n",
                    const noteId = noteRefTarget.dataset.note;\n",
                    // *** MODIFICATION HERE: Removed concatenation from within the console.log string ***
                    console.log('Clicked footnote ref for note: ' + noteId); // Debug\n",
                    findScrollHighlight(noteId, 'link'); // Scroll the text column\n",
                }}\n", # Escaped JS brace
            }});\n", # Escaped JS brace

            // --- TOC Toggle (Show/Hide) Script ---\n",
# ... rest of existing code ...
            const tocToggle = document.getElementById('toc-toggle'); // Close button
            const tocOpenButton = document.getElementById('toc-open-button'); // Open button
            const body = document.body;

            if (tocToggle && tocOpenButton) {{ // Only run if TOC elements exist
                
                // CLOSE Button (inside TOC)
                tocToggle.addEventListener('click', function() {{
                    body.classList.add('toc-collapsed');
                }});
                
                // OPEN Button (outside TOC)
                tocOpenButton.addEventListener('click', function() {{
                    body.classList.remove('toc-collapsed');
                }});
            }}
            // --- END TOC Toggle Script ---
            
        }});
    </script>
</body>
</html>
""")
            
            # --- Part 8: Final Join ---
            print("     - Assembling: Final Join")
            html_template = "".join(parts)
            print("   - Debug: HTML template assembled successfully.")

        except Exception as e:
            print(f"\n   - FATAL ERROR: Assembly failed at one of the 'Assembling:' steps above.\n")
            raise e # Re-raise the exception to show where it happened

        # --- 4. Write to file ---
        with open(html_output_filepath, 'w', encoding='utf-8') as f:
            f.write(html_template)

        # --- Calculate and report unlinked notes ---
        unlinked_note_keys = found_note_keys - linked_ref_keys
        if unlinked_note_keys:
            print(f"   - Warning: {len(unlinked_note_keys)} Notes found but NOT linked from text:")
            sorted_unlinked = sorted(list(unlinked_note_keys), key=sort_key)
            for unlinked_key in sorted_unlinked:
                print(f"     - Note key: {unlinked_key}")

        print(f"   - Successfully created '{os.path.basename(html_output_filepath)}'.")

    except IOError:
        print(f"   - Error: File not found '{xml_filepath}'.")
    except etree.XMLSyntaxError as e:
        print(f"   - Error: XML syntax error in '{xml_filepatch}'. {e}")
    except Exception as e:
        print(f"   - Error: An unexpected error occurred processing '{xml_filepath}'.")
        import traceback
        traceback.print_exc() # Print full error for debugging

In [4]:
# Check if the XML_FILES list (from Cell 1) exists and has files in it
if 'XML_FILES' not in locals() or not XML_FILES:
    print("Error: XML_FILES list not found or is empty.")
    print("Please make sure you have run the first cell successfully.")
else:
    print(f"\n--- Starting conversion of {len(XML_FILES)} files ---")
    
    # Loop through each file path found in Cell 1
    for xml_file in XML_FILES:
        try:
            # Create the output filename
            # e.g., 'aesch.ag.headlam_eng2.xml' -> 'aesch.ag.headlam_eng2.html'
            base_name = os.path.basename(xml_file)
            html_name = os.path.splitext(base_name)[0] + '.html'
            
            # Create the full output path using the OUTPUT_DIR from Cell 1
            html_output_file = os.path.join(OUTPUT_DIR, html_name)
            
            # Call the main function defined in Cell 2
            process_play(xml_file, html_output_file)
            
        except Exception as e:
            print(f"   - FAILED to process {xml_file}: {e}")

    print("\n--- Conversion complete ---")
    print(f"All HTML files have been saved to the '{OUTPUT_DIR}' directory.")


--- Starting conversion of 7 files ---

Processing 'aesch.ag.headlam_eng2.xml'...
   - XML parsed.
   - Debug: main_text_body set to <body>.
   - Debug: Found 187 refs using root.findall. Processing them...
   - Debug: Found 0 <s> tags to remove.
   - Data extracted: 186 footnotes processed.
   - Refs found: 186, Refs linked: 186
   - Debug C: Starting serialization...
   - Debug: Found 1 top-level div(s). No TOC needed.
    - Debug: build_inner_html fallback for TEI tag: milestone (page N/A). Context: ... weapon wielded in the hand of &lt;milestone&gt; ... &lt;/milestone&gt; treachery!...


Publisher tag found: True, Date tag found: True
   - Debug: No TOC wrapper needed.
main_title = The Agamemnon
author_display = Aeschylus (Walter George Headlam, Clinton Edward Sowerby Headlam, trans.) (George Bell & Sons 1909)
text_lang = eng
   - Debug: Assembling HTML template...
     - Assembling: Head and CSS

   - FATAL ERROR: Assembly failed at one of the 'Assembling:' steps above.

   - Err

Traceback (most recent call last):
  File "/var/folders/4h/kqvsl1vn2h99q3t0_kbcbz5xpbm7x_/T/ipykernel_13692/3286248941.py", line 1177, in process_play
    raise e # Re-raise the exception to show where it happened
  File "/var/folders/4h/kqvsl1vn2h99q3t0_kbcbz5xpbm7x_/T/ipykernel_13692/3286248941.py", line 597, in process_play
    parts.append(f"""<!DOCTYPE html>
TypeError: unsupported format string passed to function.__format__
Traceback (most recent call last):
  File "/var/folders/4h/kqvsl1vn2h99q3t0_kbcbz5xpbm7x_/T/ipykernel_13692/3286248941.py", line 1177, in process_play
    raise e # Re-raise the exception to show where it happened
  File "/var/folders/4h/kqvsl1vn2h99q3t0_kbcbz5xpbm7x_/T/ipykernel_13692/3286248941.py", line 597, in process_play
    parts.append(f"""<!DOCTYPE html>
TypeError: unsupported format string passed to function.__format__
Traceback (most recent call last):
  File "/var/folders/4h/kqvsl1vn2h99q3t0_kbcbz5xpbm7x_/T/ipykernel_13692/3286248941.py", line 1177,