In [34]:
import xml.etree.ElementTree as ET
from IPython.display import display, HTML

# --- Helper function to read XML from a file (no changes) ---
def read_xml_from_file(file_path):
    """Reads XML content from a file and returns it as a string."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        return f"<p><strong>Error: File not found at {file_path}</strong></p>"
    except Exception as e:
        return f"<p><strong>Error reading file: {e}</strong></p>"

# --- Akoma Ntoso to HTML conversion functions ---
def map_akn_tag_to_html(akn_tag):
    """Maps Akoma Ntoso tags to HTML tags."""
    mapping = {
        "act": "article", "meta": "div", "identification": "div",
        "FRBRWork": "div", "FRBRManifestation": "div", "FRBRdate": "span",
        "FRBRauthor": "span", "FRBRcountry": "span", "FRBRname": "span",
        "FRBRnumber": "span", "FRBRuri": "span", "FRBRthis": "span", "FRBRformat": "span",
        "lifecycle": "div", "eventRef": "div",
        "references": "div", "TLCRole": "div", "TLCPerson": "div", "docType": "span",
        "publication": "div", "preamble": "header", "docTitle": "h2",
        "recitals": "section", "recital": "div", "citations": "section",
        "citation": "div", "container": "div", "formula": "div",
        "enactingFormula": "div",  # CHANGED: from "p" to "div" to avoid nested p tags
        "body": "main", "article": "section",
        "paragraph": "div", "list": "ul", "point": "li",
        "content": "div", "num": "span", "p": "p",
        "conclusions": "footer", "signatureBlock": "div", "role": "p",
        "person": "p", "promulgationCommand": "p", "attachments": "aside",
        "attachment": "section", "preface": "header", "heading": "h3",
        "clause": "div",
        "default_tag": "div"
    }
    return mapping.get(akn_tag, mapping["default_tag"])

def akn_element_to_html(element):
    """Recursively converts an Akoma Ntoso XML element to an HTML string."""
    tag = element.tag

    if tag == "meta":
        meta_html_parts = [f"<div class='akn-meta' style='display:none;' title='AKN Meta: {element.attrib.get('eId', '')}'>"]
        if element.text and element.text.strip(): 
            meta_html_parts.append(element.text.strip())
        for child in element:
            meta_html_parts.append(akn_element_to_html(child))
            if child.tail and child.tail.strip(): 
                meta_html_parts.append(" " + child.tail.strip())
        meta_html_parts.append("</div>")
        return "".join(meta_html_parts)

    # SPECIAL HANDLING for enactingFormula to avoid nested p tags
    if tag == "enactingFormula":
        return handle_enacting_formula(element)

    html_tag = map_akn_tag_to_html(tag)
    attrs_list = []
    if 'eId' in element.attrib:
        attrs_list.append(f'id="{element.attrib["eId"]}"')

    base_class = f"akn-{tag.lower()}"
    classes = [base_class]
    if 'name' in element.attrib:
        name_class = element.attrib['name'].lower().replace(" ", "-").replace("_", "-")
        classes.append(f'{base_class}-name-{name_class}')
    if 'type' in element.attrib:
        classes.append(f'{base_class}-type-{element.attrib["type"].lower()}')
    if 'refersTo' in element.attrib:
         classes.append(f'{base_class}-refersto-{element.attrib["refersTo"].lower().replace("#","")}')
    if tag == "article" and 'num' in element.attrib:
        classes.append(f'akn-pasal-numval-{element.attrib["num"]}')

    # Add 'akn-ayat' class for numbered paragraphs (Ayat)
    if tag == "paragraph":
        if 'num' in element.attrib and element.attrib['num'].strip():
            classes.append("akn-ayat")

    attrs_list.append(f'class="{" ".join(classes)}"')
    attrs_str = " ".join(attrs_list)
    if attrs_str:
        attrs_str = " " + attrs_str

    html_parts = [f"<{html_tag}{attrs_str}>"]
    if element.text and element.text.strip():
        html_parts.append(element.text.strip())
    for child in element:
        html_parts.append(akn_element_to_html(child))
        if child.tail and child.tail.strip():
            html_parts.append(" " + child.tail.strip())
    html_parts.append(f"</{html_tag}>")
    return "".join(html_parts)

def handle_enacting_formula(element):
    """Special handler for enactingFormula to create proper HTML structure."""
    attrs_list = []
    if 'eId' in element.attrib:
        attrs_list.append(f'id="{element.attrib["eId"]}"')

    base_class = "akn-enactingformula"
    classes = [base_class]
    if 'name' in element.attrib:
        name_class = element.attrib['name'].lower().replace(" ", "-").replace("_", "-")
        classes.append(f'{base_class}-name-{name_class}')
    
    attrs_list.append(f'class="{" ".join(classes)}"')
    attrs_str = " ".join(attrs_list)
    if attrs_str:
        attrs_str = " " + attrs_str

    # Create a div instead of p to avoid nesting issues
    html_parts = [f"<div{attrs_str}>"]
    
    # Handle the roman numeral and content separately
    num_span = None
    content_parts = []
    
    if element.text and element.text.strip():
        content_parts.append(element.text.strip())
    
    for child in element:
        if child.tag == "num":
            # Handle the roman numeral separately
            num_span = f'<span class="akn-num">{child.text.strip() if child.text else ""}</span>'
        elif child.tag == "p":
            # Handle the content paragraph - extract its content without the p wrapper
            p_content = []
            if child.text and child.text.strip():
                p_content.append(child.text.strip())
            
            for grandchild in child:
                if grandchild.tag == "docType":
                    p_content.append(f'<span class="akn-doctype akn-doctype-refersto-{grandchild.attrib.get("refersTo", "").lower().replace("#", "")}">{grandchild.text.strip() if grandchild.text else ""}</span>')
                elif grandchild.tag == "docTitle":
                    # Convert h2 to span to keep it inline
                    title_classes = ["akn-doctitle"]
                    if 'refersTo' in grandchild.attrib:
                        title_classes.append(f'akn-doctitle-refersto-{grandchild.attrib["refersTo"].lower().replace("#", "")}')
                    p_content.append(f'<span class="{" ".join(title_classes)}">{grandchild.text.strip() if grandchild.text else ""}</span>')
                else:
                    p_content.append(akn_element_to_html(grandchild))
                
                if grandchild.tail and grandchild.tail.strip():
                    p_content.append(" " + grandchild.tail.strip())
            
            content_parts.append(" ".join(p_content))
        else:
            content_parts.append(akn_element_to_html(child))
        
        if child.tail and child.tail.strip():
            content_parts.append(" " + child.tail.strip())
    
    # Add the roman numeral first, then the content
    if num_span:
        html_parts.append(num_span)
    
    if content_parts:
        html_parts.append(f'<div class="akn-content">{"".join(content_parts)}</div>')
    
    html_parts.append("</div>")
    return "".join(html_parts)

def convert_akn_xml_to_html(xml_string):
    """Main function to convert Akoma Ntoso XML string to HTML string."""
    if xml_string.startswith("<p><strong>Error:"):
        return xml_string
    xml_string_no_ns = xml_string.replace(' xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0"', '', 1)
    try:
        root = ET.fromstring(xml_string_no_ns)
        return akn_element_to_html(root)
    except ET.ParseError as e:
        return f"<p><strong>Error parsing XML:</strong> {e}</p>"


In [37]:
# --- UPDATED CSS ---
lembaran_negara_css_styles = """
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=EB+Garamond:ital,wght@0,400;0,500;0,600;1,400&display=swap" rel="stylesheet">


<style>

/* ========================================
   IMPROVED LEMBARAN NEGARA CSS STYLING
   ======================================== */

/* --- Reset and Base Styles --- */
* {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
}

body {
    font-family: 'EB Garamond', 'Georgia', 'Times New Roman', 'Liberation Serif', serif;
    font-size: 12pt;
    line-height: 1.5;
    color: #000;
    background-color: #f5f5f5;
    padding: 20px;
}

/* --- Document Container --- */
.akn-act {
    background-color: #fff;
    max-width: 210mm;
    margin: 0 auto;
    padding: 25mm 20mm;
    box-shadow: 0 4px 20px rgba(0,0,0,0.1);
    border: 1px solid #ddd;
    min-height: 297mm;
    position: relative;
}

/* Hide metadata */
.akn-meta {
    display: none !important;
}

/* --- PREAMBLE STYLING --- */
.akn-preamble {
    text-align: center;
    margin-bottom: 3em;
}

/* Document titles */
.akn-doctitle {
    margin: 0.3em 0;
    font-weight: normal;
    font-size: 12pt;
    text-transform: uppercase;
    letter-spacing: 0.5px;
    line-height: 1.3;
}

/* Specific title styling */
#act__preamble__doctitle_1 { 
    font-weight: bold; 
    margin-bottom: 0.8em; 
}

#act__preamble__doctitle_5 { 
    font-weight: bold; 
    font-size: 13pt; 
    margin: 0.8em 0 1.2em 0;
    text-decoration: underline;
}

#act__preamble__doctitle_6 { 
    font-weight: bold; 
    margin: 1.5em 0 1em 0; 
}

/* --- Menimbang and Mengingat sections --- */
.akn-recitals, .akn-citations {
    text-align: left;
    margin: 1.5em 0;
    position: relative;
}

.akn-recitals::before {
    content: "Menimbang";
    display: block;
    margin-left: 2cm;
    margin-bottom: 0.5em;
    font-weight: normal;
}

.akn-citations::before {
    content: "Mengingat";
    display: block;
    margin-left: 2cm;
    margin-bottom: 0.5em;
    font-weight: normal;
}

/* Individual recital/citation items */
.akn-recital, .akn-citation {
    display: flex;
    margin-left: 2.5cm;
    margin-bottom: 0.3em;
    text-align: justify;
}

.akn-recital > .akn-num, .akn-citation > .akn-num {
    flex: 0 0 1.5em;
    font-weight: normal;
}

.akn-recital > .akn-p, .akn-citation > .akn-p {
    flex: 1;
    margin: 0;
    line-height: 1.4;
}

/* --- Approval and Decision --- */
.akn-container[name="approvalStatement"], 
.akn-container-name-approvalstatement {
    text-align: center;
    margin: 2em 0;
    font-style: italic;
}

.akn-formula[name="decision"],
.akn-formula-name-decision {
    text-align: center;
    margin: 2em 0 1.5em 0;
}

.akn-formula[name="decision"] .akn-p,
.akn-formula-name-decision .akn-p {
    text-align: center;
    font-weight: bold;
    text-transform: uppercase;
    margin: 0;
    font-size: 11pt; /* Changed from 13pt to 12pt to match document */
}

/* ===============================================
   FIXED REVOCATION AND ENACTING SECTIONS
   =============================================== */

/* --- Revocation Container (I. Mencabut) --- */
.akn-container[name="revocation"],
.akn-container-name-revocation {
    display: flex !important;
    align-items: flex-start;
    margin: 1.5em 0 1.5em 2cm;
    font-size: 11pt;
    line-height: 1.5;
}

.akn-container[name="revocation"] > .akn-num,
.akn-container-name-revocation > .akn-num {
    flex: 0 0 auto;
    font-weight: bold;
    margin-right: 0.5em;
    min-width: 1.5em;
}

.akn-container[name="revocation"] .akn-content,
.akn-container-name-revocation .akn-content {
    flex: 1;
}

.akn-container[name="revocation"] .akn-content .akn-p,
.akn-container-name-revocation .akn-content .akn-p {
    margin: 0;
    text-align: justify;
}

/* --- Enacting Formula (II. Menetapkan) - FIXED --- */
.akn-enactingformula,
.akn-enactingformula-name-enactingformula {
    display: flex !important;
    align-items: flex-start;
    margin: 1.5em 0 3em 2cm;
    font-size: 11pt;
    line-height: 1.5;
}

.akn-enactingformula > .akn-num,
.akn-enactingformula-name-enactingformula > .akn-num {
    flex: 0 0 auto;
    font-weight: bold;
    margin-right: 0.5em;
    min-width: 1.5em;
}

.akn-enactingformula > .akn-content,
.akn-enactingformula-name-enactingformula > .akn-content {
    flex: 1;
}

/* Style the document type and title as inline elements */
.akn-enactingformula .akn-doctype,
.akn-enactingformula-name-enactingformula .akn-doctype {
    font-weight: bold;
    text-transform: uppercase;
}

.akn-enactingformula .akn-doctitle,
.akn-enactingformula-name-enactingformula .akn-doctitle {
    font-weight: bold;
    text-transform: uppercase;
}

/* --- MAIN BODY --- */
.akn-main {
    margin-top: 3em;
}

/* --- Article (Pasal) Styling --- */
.akn-article {
    margin-bottom: 2em;
    page-break-inside: avoid;
}

.akn-article > .akn-num:first-child {
    display: block;
    text-align: left;
    font-weight: bold;
    text-transform: uppercase;
    font-size: 12pt;
    margin: 2em 0 1em 0;
    padding: 0;
}

/* --- Paragraph (Ayat) Styling --- */
.akn-paragraph.akn-ayat {
    margin-left: 1.5cm;
    margin-bottom: 1em;
    position: relative;
    padding-left: 2em;
    text-align: justify;
    line-height: 1.5;
}

.akn-paragraph.akn-ayat > .akn-num:first-child {
    position: absolute;
    left: 0;
    top: 0;
    width: 1.8em;
    font-weight: bold;
    text-align: left;
}

.akn-paragraph.akn-ayat .akn-content {
    display: block;
}

.akn-paragraph.akn-ayat .akn-p {
    margin: 0 0 0.5em 0;
}

/* --- List Styling --- */
.akn-list {
    list-style: none !important;
    padding-left: 0 !important;
    margin: 0.8em 0 0.8em 1cm;
}

.akn-point {
    position: relative;
    padding-left: 2em;
    margin-bottom: 0.5em;
    text-align: justify;
    line-height: 1.5;
}

.akn-point > .akn-num:first-child {
    position: absolute;
    left: 0;
    top: 0;
    width: 1.8em;
    font-weight: normal;
}

.akn-point .akn-content {
    display: block;
}

.akn-point .akn-p {
    margin: 0;
}

/* --- Promulgation Command --- */
.akn-container-name-promulgationcommand {
    margin: 3em 0;
    text-align: justify;
    padding: 0 1cm;
    line-height: 1.5;
    font-style: italic;
}

/* --- CONCLUSIONS AND SIGNATURES --- */
.akn-conclusions {
    margin-top: 4em;
    page-break-inside: avoid;
}

.akn-container-name-signatureblock,
.akn-container-name-promulgationblock {
    width: 48%;
    float: right;
    margin-top: 2em;
    clear: right;
}

.akn-container-name-placedateofsignature .akn-p,
.akn-container-name-placedateofpromulgation .akn-p {
    margin: 0 0 0.2em 0;
    line-height: 1.3;
}

.akn-role {
    margin: 1em 0 2.5em 0;
    font-weight: normal;
}

.akn-person {
    font-weight: bold;
    text-transform: uppercase;
    margin: 0;
}

/* Clear float after signatures */
.akn-conclusions::after {
    content: "";
    display: table;
    clear: both;
}

/* --- ATTACHMENTS (PENJELASAN) --- */
.akn-attachments {
    margin-top: 5em;
    padding-top: 2em;
    border-top: 2px solid #000;
    page-break-before: always;
}

.akn-attachments .akn-doctitle {
    font-size: 13pt;
    font-weight: bold;
    text-align: center;
    margin-bottom: 2em;
    text-decoration: underline;
}

.akn-attachments .akn-heading {
    font-size: 12pt;
    font-weight: bold;
    text-transform: uppercase;
    text-align: center;
    margin: 2em 0 1em 0;
}

.akn-clause {
    margin-bottom: 1.5em;
}

.akn-clause > .akn-heading {
    text-align: left;
    font-style: italic;
    margin-bottom: 0.5em;
    text-transform: none;
    font-size: 11pt;
}

.akn-attachments .akn-p {
    text-align: justify;
    margin-bottom: 0.8em;
    line-height: 1.5;
}

.akn-attachments .akn-list {
    margin-left: 1.5cm;
}

/* --- Responsive Design --- */
@media screen and (max-width: 768px) {
    body {
        padding: 10px;
    }
    
    .akn-act {
        padding: 15mm 10mm;
        max-width: 100%;
    }
    
    .akn-paragraph.akn-ayat,
    .akn-recital,
    .akn-citation {
        margin-left: 1cm;
    }
    
    .akn-container-name-signatureblock,
    .akn-container-name-promulgationblock {
        width: 100%;
        float: none;
        margin-top: 1em;
    }
    
    .akn-container[name="revocation"],
    .akn-container-name-revocation,
    .akn-enactingformula,
    .akn-enactingformula-name-enactingformula {
        margin-left: 1cm;
    }
}

/* --- Print Styles --- */
@media print {
    body {
        background: white;
        font-size: 12pt;
        font-family: 'Times New Roman', Times, serif;
    }
    
    .akn-act {
        box-shadow: none;
        border: none;
        margin: 0;
        padding: 2cm;
    }
    
    .akn-article {
        page-break-inside: avoid;
    }
    
    .akn-attachments {
        page-break-before: always;
    }
}
</style>
"""

In [38]:
# --- Execution in IPython Notebook ---

# 1. Specify the path to your Akoma Ntoso XML file
xml_file_path = '../outputs/UU_8_1961_vAkn_rev10_final.akn.xml'  # <--- IMPORTANT: REPLACE WITH YOUR ACTUAL FILE PATH

# 2. Read XML from the file
akn_xml_string_from_file = read_xml_from_file(xml_file_path)

# 3. Convert the XML to HTML (using the slightly modified Python functions)
generated_html = convert_akn_xml_to_html(akn_xml_string_from_file)

# 4. Display the styled HTML in the notebook
display(HTML(lembaran_negara_css_styles + generated_html))

# For debugging the generated HTML structure:
print(generated_html)

<div class="akn-akomantoso"><article id="act" class="akn-act akn-act-name-indonesianact"><div class='akn-meta' style='display:none;' title='AKN Meta: act__meta'><div id="act__meta__identification" class="akn-identification"><div id="act__meta__identification__work" class="akn-frbrwork"><span class="akn-frbrthis"></span><span class="akn-frbruri"></span><span class="akn-frbrdate akn-frbrdate-name-creation"></span><span class="akn-frbrauthor"></span><span class="akn-frbrcountry"></span><span class="akn-frbrname"></span><span class="akn-frbrnumber"></span></div><div id="act__meta__identification__manifestation" class="akn-frbrmanifestation akn-frbrmanifestation-name-publication"><span class="akn-frbrthis"></span><span class="akn-frbruri"></span><span class="akn-frbrdate akn-frbrdate-name-publication"></span><span class="akn-frbrformat"></span></div></div><div id="act__meta__lifecycle" class="akn-lifecycle"><div id="act__meta__lifecycle__enactment_1" class="akn-eventref akn-eventref-type-en

In [14]:
from bs4 import BeautifulSoup

# After generating your HTML
generated_html = convert_akn_xml_to_html(akn_xml_string_from_file)

# Pretty print the HTML
soup = BeautifulSoup(generated_html, 'html.parser')
pretty_html = soup.prettify()
print(pretty_html)

<div class="akn-akomantoso">
 <article class="akn-act akn-act-name-indonesianact" id="act">
  <div class="akn-meta" style="display:none;" title="AKN Meta: act__meta">
   <div class="akn-identification" id="act__meta__identification">
    <div class="akn-frbrwork" id="act__meta__identification__work">
     <span class="akn-frbrthis">
     </span>
     <span class="akn-frbruri">
     </span>
     <span class="akn-frbrdate akn-frbrdate-name-creation">
     </span>
     <span class="akn-frbrauthor">
     </span>
     <span class="akn-frbrcountry">
     </span>
     <span class="akn-frbrname">
     </span>
     <span class="akn-frbrnumber">
     </span>
    </div>
    <div class="akn-frbrmanifestation akn-frbrmanifestation-name-publication" id="act__meta__identification__manifestation">
     <span class="akn-frbrthis">
     </span>
     <span class="akn-frbruri">
     </span>
     <span class="akn-frbrdate akn-frbrdate-name-publication">
     </span>
     <span class="akn-frbrformat">
     

In [39]:
filename = '../outputs/UU_8_1961_vAkn_rev10_final.html'
complete_html = lembaran_negara_css_styles + pretty_html
with open(filename, 'w', encoding='utf-8') as f:
        f.write(complete_html)