In [None]:
import os
import xml.etree.ElementTree as ET
from typing import List, Union

In [None]:

def generate_journal_html(
    images: List[str], 
    uncleaned_xml: str, 
    cleaned_xml: str, 
    final_xml: str, 
    output_dir: str,
    css: Union[str, None] = None,
    css_filename: Union[str, None] = None
):
    """
    Generates an HTML file for each page of the journal, displaying the scanned image, final translation, 
    uncleaned, and cleaned text versions.

    Parameters:
        images (List[str]): List of paths to JPEG images, each corresponding to a journal page.
        uncleaned_xml (str): Path to the XML document containing uncleaned text.
        cleaned_xml (str): Path to the XML document containing cleaned text.
        final_xml (str): Path to the XML document containing the final translated and tagged text.
        output_dir (str): Directory to save the generated HTML files.
        css (str, optional): CSS styling as a string. If None, css_filename will be used.
        css_filename (str, optional): Path to an external CSS file. Ignored if `css` is provided.

    Returns:
        None
    """

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Load and parse XML files
    def parse_xml(file_path):
        tree = ET.parse(file_path)
        root = tree.getroot()
        pages = root.findall(".//page")
        return {page.get("page"): ET.tostring(page, encoding="unicode") for page in pages}

    uncleaned_pages = parse_xml(uncleaned_xml)
    cleaned_pages = parse_xml(cleaned_xml)
    final_pages = parse_xml(final_xml)

    # CSS Handling
    css_content = css if css else (open(css_filename).read() if css_filename else "")

    # Iterate over each page and generate HTML content
    for i, image_path in enumerate(images):
        page_num = str(i + 1)
        uncleaned_text = uncleaned_pages.get(page_num, "No uncleaned text available.")
        cleaned_text = cleaned_pages.get(page_num, "No cleaned text available.")
        final_text = final_pages.get(page_num, "No translated text available.")

        # Generate HTML content
        html_content = f"""
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <title>Journal Translation - Page {page_num}</title>
            <style>
                {css_content}
            </style>
        </head>
        <body>
            <h1>Journal Translation - Page {page_num}</h1>
            <div class="container">
                <!-- Left Panel for Scanned Page -->
                <div class="panel" id="original-image-panel">
                    <img src="{image_path}" alt="Original Scanned Page" style="width:100%;">
                </div>

                <!-- Right Panel for Final Translated Text with XML Tags -->
                <div class="panel" id="translated-text-panel">
                    <h2>Translated Text</h2>
                    <div class="translated-content">
                        {final_text}
                    </div>
                </div>
            </div>

            <!-- Underbar Section for Original and Cleaned Text -->
            <div class="underbar">
                <button class="collapsible">Show Original Uncleaned Text</button>
                <div class="content" id="original-text">
                    <p>{uncleaned_text}</p>
                </div>

                <button class="collapsible">Show Cleaned Text</button>
                <div class="content" id="cleaned-text">
                    <p>{cleaned_text}</p>
                </div>
            </div>

            <script>
                document.querySelectorAll(".collapsible").forEach(button => {{
                    button.addEventListener("click", function() {{
                        this.classList.toggle("active");
                        const content = this.nextElementSibling;
                        content.style.display = content.style.display === "block" ? "none" : "block";
                    }});
                }});
            </script>
        </body>
        </html>
        """

        # Write HTML content to file
        output_path = os.path.join(output_dir, f"page_{page_num}.html")
        with open(output_path, "w") as file:
            file.write(html_content)

    print(f"Generated HTML pages in {output_dir}")

# Example usage:
# generate_journal_html(["img1.jpg", "img2.jpg"], "uncleaned.xml", "cleaned.xml", "final.xml", "./output", css="your-css-string")