In [1]:
from openpyxl import load_workbook
from pathlib import Path
import os
import xml.etree.ElementTree as ET
from datetime import datetime

# ---------- XML pretty print ----------
def indent_xml(elem, level=0):
    indent = "\n" + level * "  "
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = indent + "  "
        for child in elem:
            indent_xml(child, level + 1)
        if not child.tail or not child.tail.strip():
            child.tail = indent
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = indent

# ---------- Main ----------
def generate_dc_and_dcat(xlsx_path):
    path = Path(xlsx_path)
    wb = load_workbook(xlsx_path, read_only=True, data_only=True)
    stat = os.stat(xlsx_path)

    # ---- Enriched metadata (from documentation) ----
    metadata = {
        "title": "Airport Traffic Dataset",
        "creator": "EUROCONTROL PRU / Network Manager",
        "publisher": "EUROCONTROL – Performance Review Unit (PRU)",
        "description": (
            "Dataset published by EUROCONTROL PRU containing airport-level IFR traffic data, "
            "including departures, arrivals and total movements, reported by Network Manager "
            "and Airport Operators, with temporal coverage from 2017 to 2025."
        ),
        "subject": [
            "Air traffic",
            "IFR flights",
            "Airports",
            "Aviation performance",
            "Airport operations"
        ],
        "coverage": "2017-01-01/2025-11-30",
        "date": "2024-12-04",
        "type": "Dataset (statistical)",
        "format": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
        "identifier": "EUROCONTROL-PRU-AIRPORT-TRAFFIC-V1.0",
        "language": "en",
        "rights": (
            "© European Organisation for the Safety of Air Navigation (EUROCONTROL). "
            "Data may be copied in whole or in part for non-commercial purposes, "
            "provided that EUROCONTROL is acknowledged as the source. "
            "Modification is not permitted without prior written permission. "
            "No warranty is given as to accuracy or completeness."
        ),

    }

    # =====================
    # Dublin Core XML
    # =====================
    dc_root = ET.Element(
        "metadata",
        attrib={"xmlns:dc": "http://purl.org/dc/elements/1.1/"}
    )

    for key, value in metadata.items():
        if isinstance(value, list):
            el = ET.SubElement(dc_root, f"dc:{key}")
            el.text = "; ".join(value)
        else:
            el = ET.SubElement(dc_root, f"dc:{key}")
            el.text = value

    indent_xml(dc_root)
    dc_tree = ET.ElementTree(dc_root)
    dc_tree.write(
        f"{path.stem}_dublincore.xml",
        encoding="utf-8",
        xml_declaration=True
    )

    # =====================
    # DCAT RDF/XML
    # =====================
    ns = {
        "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
        "dcat": "http://www.w3.org/ns/dcat#",
        "dct": "http://purl.org/dc/terms/"
    }

    rdf = ET.Element("rdf:RDF", attrib={
        f"xmlns:{k}": v for k, v in ns.items()
    })

    dataset = ET.SubElement(
        rdf,
        "dcat:Dataset",
        attrib={"rdf:about": metadata["identifier"]}
    )

    ET.SubElement(dataset, "dct:title").text = metadata["title"]
    ET.SubElement(dataset, "dct:description").text = metadata["description"]
    ET.SubElement(dataset, "dct:publisher").text = metadata["publisher"]
    ET.SubElement(dataset, "dct:creator").text = metadata["creator"]
    ET.SubElement(dataset, "dct:issued").text = metadata["date"]
    ET.SubElement(dataset, "dct:format").text = metadata["format"]
    ET.SubElement(dataset, "dct:identifier").text = metadata["identifier"]
    ET.SubElement(dataset, "dct:language").text = metadata["language"]
    ET.SubElement(dataset, "dct:rights").text = metadata["rights"]

    contact = ET.SubElement(dataset, "dcat:contactPoint")
    vcard_kind = ET.SubElement(contact, "vcard:Kind")
    ET.SubElement(
        vcard_kind,
        "vcard:hasEmail"
    ).text = "mailto:PRU-Support@eurocontrol.int"

    distribution = ET.SubElement(dataset, "dcat:distribution")
    dist = ET.SubElement(distribution, "dcat:Distribution")

    ET.SubElement(
        dist,
        "dct:title"
    ).text = "Airport Traffic Dataset – CSV distribution"

    ET.SubElement(
        dist,
        "dct:description"
    ).text = (
        "CSV distribution providing airport-level IFR traffic data "
        "published by EUROCONTROL PRU."
    )

    ET.SubElement(
        dist,
        "dcat:accessURL"
    ).text = "https://ansperformance.eu/csv/#aptflt-csv"

    ET.SubElement(
        dist,
        "dct:format"
    ).text = "text/csv"

    for s in metadata["subject"]:
        ET.SubElement(dataset, "dcat:keyword").text = s

    temporal = ET.SubElement(dataset, "dct:temporal")
    ET.SubElement(temporal, "rdf:Description").text = metadata["coverage"]

    indent_xml(rdf)
    dcat_tree = ET.ElementTree(rdf)
    dcat_tree.write(
        f"{path.stem}_dcat.xml",
        encoding="utf-8",
        xml_declaration=True
    )

    print("✔ Dublin Core y DCAT generados correctamente")

# ---------- Run ----------
if __name__ == "__main__":
    generate_dc_and_dcat("Airport_Traffic.xlsx")


✔ Dublin Core y DCAT generados correctamente
