In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Define namespace mappings
NAMESPACES = {
    'gmd': 'http://www.isotc211.org/2005/gmd',
    'gco': 'http://www.isotc211.org/2005/gco'
}

def extract_text(element, path):
    """Safely extract text from an XML element using XPath and namespaces."""
    found = element.find(path, NAMESPACES)
    return found.text.strip() if found is not None and found.text else ''

def parse_iso_metadata(xml_path):
    """Parse the XML and extract title, abstract, and purpose."""
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
        identification_info = root.find('gmd:identificationInfo/gmd:MD_DataIdentification', NAMESPACES)

        title = extract_text(identification_info, 'gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString')
        abstract = extract_text(identification_info, 'gmd:abstract/gco:CharacterString')
        purpose = extract_text(identification_info, 'gmd:purpose/gco:CharacterString')

        return title, abstract, purpose
    except Exception as e:
        print(f"Error processing {xml_path}: {e}")
        return '', '', ''


In [None]:
# Set your root directory path
root_directory = 'edge-shape'

# List to hold extracted data
metadata_records = []

# Walk through each folder
for folder_name in os.listdir(root_directory):
    folder_path = os.path.join(root_directory, folder_name)
    if os.path.isdir(folder_path):
        for file_name in os.listdir(folder_path):
            if file_name.lower().endswith('.xml'):
                xml_path = os.path.join(folder_path, file_name)
                title, abstract, purpose = parse_iso_metadata(xml_path)

                metadata_records.append({
                    'Folder Name': folder_name,
                    'File Name': file_name,
                    'Title': title,
                    'Abstract': abstract,
                    'Purpose': purpose
                })


In [None]:
# Convert to DataFrame
df = pd.DataFrame(metadata_records)

# Save CSV
output_path = os.path.join(root_directory, 'iso_metadata_summary.csv')
df.to_csv(output_path, index=False)