In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Namespace definitions for ISO 19139
NAMESPACES = {
    'gmd': 'http://www.isotc211.org/2005/gmd',
    'gco': 'http://www.isotc211.org/2005/gco'
}

def extract_text_iso(element, path):
    """Extract text from an ISO XML element using XPath with namespaces."""
    node = element.find(path, NAMESPACES)
    return node.text.strip() if node is not None and node.text else ''

def extract_text_fgdc(parent, path):
    """Extract text from an FGDC XML element using tag path."""
    node = parent.find(path)
    return node.text.strip() if node is not None and node.text else ''

def parse_metadata(xml_path):
    """Parse XML metadata (ISO 19139 or FGDC) and return title, abstract, purpose."""
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Try ISO 19139 structure
        iso_id = root.find('gmd:identificationInfo/gmd:MD_DataIdentification', NAMESPACES)
        if iso_id is not None:
            title = extract_text_iso(iso_id, 'gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString')
            abstract = extract_text_iso(iso_id, 'gmd:abstract/gco:CharacterString')
            purpose = extract_text_iso(iso_id, 'gmd:purpose/gco:CharacterString')
            return title, abstract, purpose

        # Try FGDC structure
        fgdc_id = root.find('idinfo')
        if fgdc_id is not None:
            title = extract_text_fgdc(fgdc_id, 'citation/citeinfo/title')
            abstract = extract_text_fgdc(fgdc_id, 'descript/abstract')
            purpose = extract_text_fgdc(fgdc_id, 'descript/purpose')
            return title, abstract, purpose

        # Unknown format
        return '', '', ''

    except Exception as e:
        print(f"Error parsing {xml_path}: {e}")
        return '', '', ''


In [None]:
# Set the path to your directory containing shapefile folders
root_directory = 'edge-shape'  # Update this path

# Collect metadata records
records = []

for folder_name in os.listdir(root_directory):
    folder_path = os.path.join(root_directory, folder_name)
    if os.path.isdir(folder_path):
        for file_name in os.listdir(folder_path):
            if file_name.lower().endswith('.xml'):
                xml_path = os.path.join(folder_path, file_name)
                title, abstract, purpose = parse_metadata(xml_path)

                records.append({
                    'Folder Name': folder_name,
                    'File Name': file_name,
                    'Title': title,
                    'Abstract': abstract,
                    'Purpose': purpose
                })


In [None]:
# Create DataFrame
df = pd.DataFrame(records)

# Save to CSV
output_csv = os.path.join(root_directory, 'metadata_summary.csv')
df.to_csv(output_csv, index=False)
