# Jupyter cell: Batch convert KML/KMZ to CSVs


In [5]:
import os
import re
import zipfile
import xml.etree.ElementTree as ET
from typing import List, Dict, Optional

import pandas as pd
from pyproj import CRS, Transformer


In [6]:
# ----------------------------
# Configuration
# ----------------------------
DIR = r"C:\Users\USFJ139860\Downloads"
RECURSIVE = False                      # Set True to search subfolders

# Output CRSs
CRS_1 = "EPSG:4326"                    # WGS84 lon/lat
CRS_2 = "EPSG:6407"                    # NAD83(2011) / Arizona East (International foot)

# Projected CSV column order (choose one): "PNEZD" or "PENZD"
PROJECTED_FORMAT = "PNEZD"

# Vertical unit handling for projected CSV:
# - None: keep KML altitude in meters
# - "match_projected_xy": convert altitude to same unit as XY of CRS_2
# - "meter": force meters
# - "us_survey_foot" or "international_foot": convert altitude accordingly
VERTICAL_UNIT_MODE = "match_projected_xy"

# ----------------------------
# Helpers
# ----------------------------
def sanitize_crs(crs_str: str) -> str:
    """Lowercase, replace ':' with '-' for file-safe tokens."""
    return crs_str.strip().lower().replace(":", "-")

def list_kml_kmz(folder: str, recursive: bool = False):
    exts = (".kml", ".kmz")
    if recursive:
        for root, _, files in os.walk(folder):
            for f in files:
                if f.lower().endswith(exts):
                    yield os.path.join(root, f)
    else:
        for f in os.listdir(folder):
            if f.lower().endswith(exts):
                yield os.path.join(folder, f)

def read_kml_bytes(path: str) -> bytes:
    """Return KML XML bytes from .kml or .kmz (prefers doc.kml inside KMZ)."""
    if path.lower().endswith(".kml"):
        with open(path, "rb") as fh:
            return fh.read()
    with zipfile.ZipFile(path, "r") as z:
        names = [n for n in z.namelist() if n.lower().endswith(".kml")]
        if not names:
            raise ValueError(f"No .kml found inside KMZ: {path}")
        name = "doc.kml" if "doc.kml" in names else names[0]
        return z.read(name)

def extract_points(kml_xml: bytes) -> List:
    """
    Extract Placemark point(s) -> rows with PID, Description, Longitude, Latitude, Altitude_m.
    For non-point geometries, takes the first coordinate as a fallback (simple, light-weight).
    """
    ns = {"kml": "http://www.opengis.net/kml/2.2"}
    root = ET.fromstring(kml_xml)
    rows = []
    for pm in root.findall(".//kml:Placemark", ns):
        pid = (pm.findtext("kml:name", default="", namespaces=ns) or "").strip()
        desc = (pm.findtext("kml:description", default="", namespaces=ns) or "").strip()

        # Prefer Point coords
        coords_el = pm.find(".//kml:Point/kml:coordinates", ns)
        if coords_el is None:
            # Fallback: first coordinate from LineString/Polygon
            coords_el = pm.find(".//kml:LineString/kml:coordinates", ns) or \
                        pm.find(".//kml:Polygon//kml:coordinates", ns)
        if coords_el is None:
            continue

        txt = coords_el.text or ""
        parts = re.split(r"\s+", txt.strip())
        if not parts:
            continue
        xyz = parts[0].split(",")
        if len(xyz) < 2:
            continue

        lon = float(xyz[0])
        lat = float(xyz[1])
        alt_m = float(xyz[2]) if len(xyz) > 2 and xyz[2] not in ("", None) else 0.0

        rows.append({
            "PID": pid,
            "Description": desc,
            "Longitude": lon,
            "Latitude": lat,
            "EllipsoidalHeight_m": alt_m  # KML alt assumed meters
        })
    return rows

def vertical_convert(alt_m: float,
                     crs_proj: CRS,
                     mode: Optional[str] = None) -> float:
    """Convert altitude (meters) to desired unit."""
    if mode is None or mode == "meter":
        return alt_m
    if mode in ("us_survey_foot", "international_foot"):
        factor_m_per_ft = {"us_survey_foot": 0.3048006096012192,
                           "international_foot": 0.3048}[mode]
        return alt_m / factor_m_per_ft
    if mode == "match_projected_xy":
        # Use target CRS first axis unit conversion to meters
        axis = crs_proj.axis_info[0]
        unit_to_m = axis.unit_conversion_factor  # ft->m or m->m
        unit_name = axis.unit_name.lower()
        # If XY are feet, convert vertical meters to the same foot
        if "foot" in unit_name:
            return alt_m / unit_to_m
        return alt_m
    # default: meters
    return alt_m

# ----------------------------
# Transform setup
# ----------------------------
crs_wgs = CRS.from_user_input(CRS_1)
crs_proj = CRS.from_user_input(CRS_2)
to_proj = Transformer.from_crs(crs_wgs, crs_proj, always_xy=True)

# ----------------------------
# Processing
# ----------------------------
epsg1_token = sanitize_crs(CRS_1)
epsg2_token = sanitize_crs(CRS_2)

files = list(list_kml_kmz(DIR, RECURSIVE))
if not files:
    print(f"No .kml/.kmz files found in: {DIR}")

for path in files:
    try:
        kml_bytes = read_kml_bytes(path)
        rows = extract_points(kml_bytes)
        if not rows:
            print(f"Skip (no points): {path}")
            continue

        # WGS84 DataFrame (CSV #1)
        df_wgs = pd.DataFrame(rows)[["PID", "Longitude", "Latitude", "EllipsoidalHeight_m", "Description"]]

        # Projected DataFrame (CSV #2)
        easting, northing = to_proj.transform(df_wgs["Longitude"].values,
                                              df_wgs["Latitude"].values)
        elev = [
            vertical_convert(h, crs_proj, VERTICAL_UNIT_MODE)
            for h in df_wgs["EllipsoidalHeight_m"].fillna(0.0).tolist()
        ]

        df_proj = pd.DataFrame({
            "PID": df_wgs["PID"].values,
            "Easting": easting,
            "Northing": northing,
            "Elevation": elev,
            "Description": df_wgs["Description"].values
        })

        if PROJECTED_FORMAT.upper() == "PNEZD":
            df_proj = df_proj[["PID", "Northing", "Easting", "Elevation", "Description"]]
        elif PROJECTED_FORMAT.upper() == "PENZD":
            df_proj = df_proj[["PID", "Easting", "Northing", "Elevation", "Description"]]
        else:
            raise ValueError("PROJECTED_FORMAT must be 'PNEZD' or 'PENZD'")

        # Output paths (same folder as the source file)
        base = os.path.splitext(os.path.basename(path))[0].strip().lower()
        folder = os.path.dirname(path)

        csv_wgs = os.path.join(folder, f"{base}-{epsg1_token}.csv")
        csv_proj = os.path.join(folder, f"{base}-{PROJECTED_FORMAT}-{epsg2_token}.csv")

        # Write
        df_wgs.to_csv(csv_wgs, index=False)
        df_proj.to_csv(csv_proj, index=False)

        print(f"Wrote:\n  {csv_wgs}\n  {csv_proj}")

    except Exception as ex:
        print(f"Error processing {path}: {ex}")


Wrote:
  C:\Users\USFJ139860\Downloads\pcons-epsg-4326.csv
  C:\Users\USFJ139860\Downloads\pcons-PNEZD-epsg-6407.csv
Wrote:
  C:\Users\USFJ139860\Downloads\x-points-epsg-4326.csv
  C:\Users\USFJ139860\Downloads\x-points-PNEZD-epsg-6407.csv
