In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

def extract_text_fgdc(parent, path):
    """Extract text from an FGDC XML element using tag path."""
    node = parent.find(path)
    return node.text.strip() if node is not None and node.text else ''

def parse_fgdc_entities(xml_path):
    """Parse <eainfo> section of FGDC XML and return list of entity-attribute dictionaries."""
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
        idinfo = root.find('idinfo')
        eainfo = root.find('eainfo')
        if idinfo is not None and eainfo is not None:
            detailed = eainfo.find('detailed')
            if detailed is not None:
                entity_label = extract_text_fgdc(detailed, 'enttyp/enttypl')
                entity_desc = extract_text_fgdc(detailed, 'enttyp/enttypd')

                attr_elements = detailed.findall('attr')
                records = []
                for attr in attr_elements:
                    attr_label = extract_text_fgdc(attr, 'attrlabl')
                    attr_def = extract_text_fgdc(attr, 'attrdef')
                    attr_src = extract_text_fgdc(attr, 'attrdefs')  # optional source
                    records.append({
                        'Entity Label': entity_label,
                        'Entity Description': entity_desc,
                        'Attribute Label': attr_label,
                        'Attribute Definition': attr_def,
                        'Attribute Source': attr_src
                    })
                return records
        return []
    except Exception as e:
        print(f"Error parsing {xml_path}: {e}")
        return []


In [None]:
# Set the root path to your directory of shapefile folders
root_directory = 'edge-shape'  # UPDATE this path

# Collect all entity-attribute records
eainfo_records = []

for folder_name in os.listdir(root_directory):
    folder_path = os.path.join(root_directory, folder_name)
    if os.path.isdir(folder_path):
        for file_name in os.listdir(folder_path):
            if file_name.lower().endswith('.xml'):
                xml_path = os.path.join(folder_path, file_name)
                try:
                    tree = ET.parse(xml_path)
                    root = tree.getroot()
                    # Check for FGDC format
                    if root.tag == 'metadata' and root.find('eainfo') is not None:
                        entries = parse_fgdc_entities(xml_path)
                        for entry in entries:
                            entry['Folder Name'] = folder_name
                            entry['File Name'] = file_name
                            eainfo_records.append(entry)
                except Exception as e:
                    print(f"Skipping file {file_name} due to error: {e}")


In [None]:
# Create DataFrame
eainfo_df = pd.DataFrame(eainfo_records)

# Save to CSV
output_csv = os.path.join(root_directory, 'fgdc_eainfo_summary.csv')
eainfo_df.to_csv(output_csv, index=False)