In [12]:
import requests
import uuid
from datetime import datetime
import xml.etree.ElementTree as ET
import os

In [13]:
# Register ISO namespaces
namespaces = {
    "gmd": "http://www.isotc211.org/2005/gmd",
    "gco": "http://www.isotc211.org/2005/gco",
    "gfc": "http://www.isotc211.org/2005/gfc",
    "srv": "http://www.isotc211.org/2005/srv",
    "gmx": "http://www.isotc211.org/2005/gmx",
    "gts": "http://www.isotc211.org/2005/gts",
    "gsr": "http://www.isotc211.org/2005/gsr",
    "gss": "http://www.isotc211.org/2005/gss",
    "gmi": "http://www.isotc211.org/2005/gmi",
    "napm": "http://www.geconnections.org/nap/napMetadataTools/napXsd/napm",
    "gml": "http://www.opengis.net/gml/3.2",
    "xlink": "http://www.w3.org/1999/xlink",
    "xsi": "http://www.w3.org/2001/XMLSchema-instance"
}

for prefix, uri in namespaces.items():
    ET.register_namespace(prefix, uri)

In [14]:
# Function to build element
def elem(tag, ns="gmd", text=None, attrib=None):
    if attrib is None:
        attrib = {}
    el = ET.Element(f"{{{namespaces[ns]}}}{tag}", attrib)
    if text:
        el.text = text
    return el

# Function to build nested elements
def nestedElem(*elements, return_leaf=False):
    for parent, child in zip(elements, elements[1:]):
        parent.append(child)

    if return_leaf:
        return elements[0], elements[-1]
    
    return elements[0]

In [15]:
# Metadata Record Information
def add_fileIdentifier(root, record_id):
  node = elem("fileIdentifier", "gmd")
  node.append(elem("CharacterString", "gco", record_id))

  root.append(node)

def add_dateStamp(root):
  now = datetime.now().isoformat() + "Z"

  node = elem("dateStamp", "gmd")
  node.append(elem("DateTime", "gco", now))

  root.append(node)

def add_language(root, record):
  lang = record.get("edhProfile", {}).get("language", "EN").upper()
  lang_value = "eng; CAN" if lang == "EN" else "fra; CAN"

  node = elem("language", "gmd")
  node.append(elem("CharacterString", "gco", lang_value))

  root.append(node)

def add_characterSet(root, record):
  node = elem("characterSet", "gmd")
  node1 = elem("MD_CharacterSetCode", "gmd", 
               record.get("edhProfile", {}).get("characterSet", "UTF8"),
               attrib={
                 "codeList": "https://schemas.metadata.geo.ca/register/napMetadataRegister.xml#IC_95",
                 "codeListValue": "RI_458"
               })
  node.append(node1)
  root.append(node)

def add_hierarchyLevel(root, record):
  node = elem("hierarchyLevel", "gmd")
  node1 = elem("MD_ScopeCode ", "gmd", 
               record.get("edhProfile", {}).get("hierarchyLevel", "Dataset"),
               attrib={
                 "codeList": "https://schemas.metadata.geo.ca/register/napMetadataRegister.xml#IC_108",
                 "codeListValue": "RI_623"
               })
  node.append(node1)
  root.append(node)

# Contact
def add_contact(root, record):
  node = elem("contact", "gmd")
  node_party = elem("CI_ResponsibleParty", "gmd")

  # organisationName
  org_name = record.get("organizationName", "Fisheries and Oceans Canada")
  node_organisationName = elem("organisationName", "gmd")
  node_organisationName.append(elem("CharacterString", "gco", org_name))
  node_party.append(node_organisationName)

  # electronicMailAddress
  email = record.get("emailAddress", "")
  node_contactInfo = elem("contactInfo", "gmd")
  node_contactInfo.append(
    nestedElem(
        elem("CI_Contact", "gmd"),
        elem("address", "gmd"),
        elem("CI_Address", "gmd"),
        elem("electronicMailAddress", "gmd"),
        elem("CharacterString", "gco", email)
    )
  )
  node_party.append(node_contactInfo)

  # role
  role = record.get("edhProfile", {}).get("contactRole", "point of contact")
  node_role = elem("role", "gmd")
  node_role.append(elem("CI_RoleCode", "gmd", role,
                        attrib={
                          "codeList": "https://schemas.metadata.geo.ca/register/napMetadataRegister.xml#IC_90",
                          "codeListValue": "RI_414"
                        }))
  node_party.append(node_role)
  
  node.append(node_party)
  root.append(node)

# Data Identification
def add_identificationInfo(root, record):
  node = elem("identificationInfo", "gmd")
  node_identification = elem("MD_DataIdentification", "gmd")

  # citation
  node_citation = elem("citation", "gmd")
  node_ci_citation = elem("CI_Citation", "gmd")

  # title
  title = record.get("title", "")
  title_fr = record.get("titleFr", "")
  node_ci_citation.append(
    nestedElem(
      elem("title", "gmd"),
      elem("CharacterString", "gco", title),
      elem("PT_FreeText", "gmd"),
      elem("textGroup", "gmd"),
      elem("LocalisedCharacterString", "gmd", title_fr, attrib={"locale": "#fra"})
    )
  )

  # date
  data_publication = record.get("edhProfile", {}).get("dataPublication", "")
  data_created = record.get("edhProfile", {}).get("dataCreated", "")

  date_items = [
    (data_created, "creation; cr√©ation", "RI_366"),
    (data_publication, "publication; publication", "RI_367")
  ]

  for date, code, code_list_value in date_items:
    node_date = elem("date", "gmd")
    node_ci_date = elem("CI_Date", "gmd")

    node_date2 = elem("date", "gmd")
    node_date2.append(elem("Date", "gco", date))

    node_date_type = elem("dateType", "gmd")
    node_date_type.append(elem("CI_DateTypeCode", "gmd", code,
                               attrib={
                                 "codeList": "https://schemas.metadata.geo.ca/register/napMetadataRegister.xml#IC_87",
                                 "codeListValue": code_list_value
                               }))
    
    node_ci_date.append(node_date2)
    node_ci_date.append(node_date_type)
    node_date.append(node_ci_date)
    node_ci_citation.append(node_date)

  # Cited Responsible Party
  party_ind_name = record.get("edhProfile", {}).get("citedResponsiblePartyIndividualName", "")
  party_org_name = record.get("edhProfile", {}).get("citedResponsiblePartyOrganizationName", "")
  party_email = record.get("edhProfile", {}).get("citedResponsiblePartyEmail", "")
  party_role = record.get("edhProfile", {}).get("citedResponsiblePartyRole", "")

  node_cited_responsible_party = elem("citedResponsibleParty", "gmd")
  node_ci_responsible_party = elem("CI_ResponsibleParty", "gmd")

  node_ci_responsible_party.append(
    nestedElem(
      elem("individualName", "gmd"),
      elem("CharacterString", "gco", party_ind_name)
    )
  )

  node_ci_responsible_party.append(
    nestedElem(
      elem("organisationName ", "gmd"),
      elem("CharacterString", "gco", party_org_name)
    )
  )

  node_contactInfo = elem("contactInfo", "gmd")
  node_email, leaf_email = nestedElem(
    elem("CI_Contact", "gmd"),
    elem("address", "gmd"),
    elem("CI_Address", "gmd"),
    elem("electronicMailAddress", "gmd"),
    return_leaf=True
  )
  leaf_email.append(elem("CharacterString", "gco", party_email))
  leaf_email.append(
    nestedElem(
      elem("PT_FreeText", "gmd"),
      elem("textGroup", "gmd"),
      elem("LocalisedCharacterString", "gmd", party_email, attrib={"locale": "#fra"})
    )
  )

  node_contactInfo.append(node_email)
  node_ci_responsible_party.append(node_contactInfo)
  
  node_ci_responsible_party.append(
    nestedElem(
      elem("role ", "gmd"),
      elem("CI_RoleCode", "gmd", party_role, 
           attrib={
             "codeList": "https://schemas.metadata.geo.ca/register/napMetadataRegister.xml#IC_90",
             "codeListValue": "RI_415"
           })
    )
  )
  node_cited_responsible_party.append(node_ci_responsible_party)
  node_ci_citation.append(node_cited_responsible_party)

  node_citation.append(node_ci_citation)
  node_identification.append(node_citation)

  # abstract
  abstract = record.get("abstractEN", "")
  abstract_fr = record.get("abstractFR", "")
  node_abstract = elem("abstract", "gmd")
  node_abstract.append(elem("CharacterString", "gco", abstract))
  node_abstract.append(
    nestedElem(
      elem("PT_FreeText", "gmd"),
      elem("textGroup", "gmd"),
      elem("LocalisedCharacterString", "gmd", abstract_fr, attrib={"locale": "#fra"})
    )
  )

  node_identification.append(node_abstract)

  # Status
  status = record.get("edhProfile", {}).get("datasetStatus", "")
  node_status = elem("status", "gmd")
  node_status.append(elem("MD_ProgressCode", "gmd", status, 
                          attrib={
                            "codeList": "https://schemas.metadata.geo.ca/register/napMetadataRegister.xml#IC_106",
                            "codeListValue": "RI_596"
                          }))
  
  node_identification.append(node_status)

  # Language
  lang = record.get("edhProfile", {}).get("language", "EN").upper()
  lang_value = "eng" if lang == "EN" else "fra"
  node_language = elem("language", "gmd")
  node_language.append(elem("CharacterString", "gco", lang_value))

  node_identification.append(node_language)

  # Topic category
  topic_category_list = record.get("edhProfile", {}).get("topicCategory", [])
  for topic in topic_category_list:
    node_topic = elem("topicCategory", "gmd")
    node_topic.append(elem("MD_TopicCategoryCode", "gmd", topic.lower()))
    node_identification.append(node_topic)

  # Maintenance and Update Frequency
  frequency = record.get("updateFrequency", "")
  node_maintenance = elem("resourceMaintenance", "gmd")
  node_maintenance.append(
    nestedElem(
      elem("MD_MaintenanceInformation", "gmd"),
      elem("maintenanceAndUpdateFrequency", "gmd"),
      elem("MD_MaintenanceFrequencyCode", "gmd", frequency.lower(), 
           attrib={
             "codeList": "https://schemas.metadata.geo.ca/register/napMetadataRegister.xml#IC_102",
             "codeListValue": "RI_539"
           }),
    )
  )

  node_identification.append(node_maintenance)

  node.append(node_identification)
  root.append(node)




# Keywords


# Resource Constraints (general)


# Resource Constraints (security)


# Temporal Extent


# Reference Systems


# Distribution Format


# Distributor Contact



In [None]:
def build_xml(record, record_id):
    """
    ISO 19115
    """

    # ROOT <gmd:MD_Metadata>
    root = elem("MD_Metadata", "gmd")

    # Metadata Record Information
    add_fileIdentifier(root, record_id)
    add_dateStamp(root)
    add_language(root, record)
    add_characterSet(root, record)
    add_hierarchyLevel(root, record)

    # Contact
    add_contact(root, record)

    # Data Identification
    # Cited Responsible Party
    add_identificationInfo(root, record)

    # Keywords


    # Resource Constraints (general)


    # Resource Constraints (security)


    # Temporal Extent


    # Reference Systems


    # Distribution Format


    # Distributor Contact

    return ET.ElementTree(root)

In [17]:
def fetch_json(api_url):
    response = requests.get(api_url)
    response.raise_for_status()
    return response.json()

In [18]:
def extract_record_id(record):
    return record.get("files", [{}])[0].get("id", str(uuid.uuid4()))

In [19]:
def generate_xml(api_url, output_dir="output"):
    os.makedirs(output_dir, exist_ok=True)

    raw = fetch_json(api_url)
    records = raw.get("data", [])

    print(f"Found {len(records)} records.")

    for record in records:
        record_id = extract_record_id(record)
        xml_tree = build_xml(record, record_id)

        filename = f"{output_dir}/{record_id}.xml"
        xml_tree.write(filename, encoding="utf-8", xml_declaration=True)
        print(f"Saved {filename}")

In [20]:
api_url = "http://qc-cdos-css-1:8815/api/portal/dataset/harvest"
generate_xml(api_url)

Found 9 records.
Saved output/e8238156-750b-4d72-af7d-588d6910080a.xml
Saved output/16871717-3b3f-41fc-b11d-28caf4dc3791.xml
Saved output/63bcaf24-5734-4e2f-8482-84f9aceef3b4.xml
Saved output/a751a4e5-4ea4-4a51-9ca5-024cd353483d.xml
Saved output/6b0cb02c-8811-481e-b6e1-7dc7b601067e.xml
Saved output/dc7ebc7f-fe37-4576-b901-5be5c35cc541.xml
Saved output/9203e352-e949-4e69-a8b8-85ddd39446d6.xml
Saved output/b627e320-d417-47fa-9c4a-e7d9d84602ed.xml
Saved output/7d9c74a1-dd80-4362-af74-bc1c4c5b10e4.xml
