# LandXML → CSV (Points Extractor)

This notebook scans a folder for `*.landxml` (and `*.xml` that contain LandXML) files,
parses **points** (e.g., `CgPoint`, `Point`, `P`) using Python's built-in `xml.etree.ElementTree` (no external parser needed),
and writes a consolidated CSV of all points to a subfolder named `csv_pts_xml` inside the selected input folder.

**Usage**:
1. Enter or paste your LandXML folder path in the widget (e.g., `P:\2025\ARROYO DE LOS PINOS\RS\02_PRODUCTION\07_Cyclone_3DR\LandXML`).
2. Click **Parse .landxml → CSV** to run.
3. The output CSV will be saved to: `<input_folder>/csv_pts_xml/points_all.csv`.

**Notes**:
- Handles common point containers: `CgPoints/CgPoint`, `Point` (with `x/y/z` attributes or text), and `P` (text `x y z`).
- Adds columns: `file_name`, `source_tag`, `point_id`, `name`, `code`, `desc`, `x`, `y`, `z`.
- If your dataset uses different tags or structures, adjust the parser function accordingly.


In [1]:

import os
import re
from pathlib import Path
import xml.etree.ElementTree as ET
import pandas as pd

# -- Helper: get local (namespace-stripped) tag name --
def _local(tag: str) -> str:
    return tag.split('}', 1)[-1] if '}' in tag else tag

# -- Robust float parse --
def _to_float(val):
    try:
        return float(val)
    except (TypeError, ValueError):
        return None

# -- Extract (x,y,z) from element; supports attributes or text --
def _extract_xyz(el):
    # Try attributes first
    x = _to_float(el.get('x'))
    y = _to_float(el.get('y'))
    z = _to_float(el.get('z'))
    if x is not None and y is not None and z is not None:
        return x, y, z
    # Fallback to text (e.g., 'x y z' or 'x,y,z')
    txt = (el.text or '').strip()
    if not txt:
        return None, None, None
    parts = re.split(r"[\s,;]+", txt)
    if len(parts) >= 3:
        x = _to_float(parts[0])
        y = _to_float(parts[1])
        z = _to_float(parts[2])
        return x, y, z
    return None, None, None

def parse_landxml_points(file_path: Path):
    '''Parse a LandXML file and return a list of point dicts.
    Supports common point tags: CgPoint, Point, P.
    '''
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
    except ET.ParseError as e:
        return [], f'ParseError: {e}'

    if _local(root.tag).lower() != 'landxml':
        # Not a LandXML root; skip
        return [], 'Not a LandXML document'

    records = []
    # Iterate all elements; pick by local tag name
    for el in root.iter():
        lname = _local(el.tag)
        if lname in ('CgPoint', 'Point', 'P'):
            x, y, z = _extract_xyz(el)
            if x is None or y is None or z is None:
                continue
            rec = {
                'file_name': file_path.name,
                'source_tag': lname,
                'point_id': el.get('id') or el.get('name') or el.get('pntRef'),
                'name': el.get('name'),
                'code': el.get('code'),
                'desc': el.get('desc'),
                'x': x,
                'y': y,
                'z': z,
            }
            records.append(rec)
    return records, None

def scan_folder(folder: Path):
    # Find LandXML files
    files = []
    files.extend(folder.glob('*.landxml'))
    files.extend(folder.glob('*.xml'))
    # Deduplicate
    files = sorted(set(files))
    all_records = []
    skipped = []
    for f in files:
        recs, err = parse_landxml_points(f)
        if err:
            skipped.append((f.name, err))
            continue
        all_records.extend(recs)
    return all_records, skipped

def save_csv(records, out_dir: Path):
    out_dir.mkdir(parents=True, exist_ok=True)
    df = pd.DataFrame.from_records(records)
    # Order columns
    cols = ['file_name','source_tag','point_id','name','code','desc','x','y','z']
    for c in cols:
        if c not in df.columns:
            df[c] = None
    df = df[cols]
    out_path = out_dir / 'points_all.csv'
    df.to_csv(out_path, index=False)
    return out_path, len(df)

print('Ready: use the widget below to select your input folder and run.')


Ready: use the widget below to select your input folder and run.


In [2]:

# -- Simple widget UI to select folder and run --
try:
    import ipywidgets as widgets
    from IPython.display import display
except Exception as e:
    widgets = None
    print('ipywidgets not available. You can run manually:')
    print('    folder = Path(r"P:\2025\ARROYO DE LOS PINOS\RS\02_PRODUCTION\07_Cyclone_3DR\LandXML")')
    print('    records, skipped = scan_folder(folder)')
    print('    out_path, count = save_csv(records, folder / "csv_pts_xml")')

if widgets:
    folder_text = widgets.Text(
        value='',
        placeholder='P:\2025\ARROYO DE LOS PINOS\RS\02_PRODUCTION\07_Cyclone_3DR\LandXML',
        description='Input folder:',
        layout=widgets.Layout(width='80%')
    )
    run_btn = widgets.Button(description='Parse .landxml → CSV', button_style='success')
    output = widgets.Output()

    def on_run_clicked(b):
        output.clear_output()
        with output:
            raw = folder_text.value.strip()
            if not raw:
                print('Please enter a folder path.')
                return
            folder = Path(raw)
            if not folder.exists() or not folder.is_dir():
                print(f'Invalid folder: {folder}')
                return
            print(f'Scanning: {folder}')
            records, skipped = scan_folder(folder)
            if not records:
                print('No points found in LandXML files.')
            out_dir = folder / 'csv_pts_xml'
            out_path, count = save_csv(records, out_dir)
            print(f'Saved {count} points to: {out_path}')
            if skipped:
                print('
Skipped files:')
                for name, err in skipped:
                    print(f'  - {name}: {err}')

    run_btn.on_click(on_run_clicked)
    display(folder_text, run_btn, output)


  print('    folder = Path(r"P:\2025\ARROYO DE LOS PINOS\RS\02_PRODUCTION\07_Cyclone_3DR\LandXML")')
  placeholder='P:\2025\ARROYO DE LOS PINOS\RS\02_PRODUCTION\07_Cyclone_3DR\LandXML',


SyntaxError: unterminated string literal (detected at line 41) (2168711328.py, line 41)

### Troubleshooting
- If you don't see the widget, ensure `ipywidgets` is installed and enabled in your Jupyter environment.
- For paths on Windows, use raw strings (prefix with `r`) or escape backslashes (`\`).
- If your LandXML stores points under different tags, adjust `parse_landxml_points()` to include those tag names.
