In [1]:
from pathlib import Path

# Directory containing your SVG annotations (Inkscape files)
SRC_ROOT = Path("/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow")

# Find all SVGs recursively, excluding the *_gr.svg files
svgs = sorted(
    p for p in SRC_ROOT.rglob("*.svg")
    if "_gr.svg" not in p.name.lower()
)

print(f"Found {len(svgs)} SVGs under: {SRC_ROOT}")
svgs[:10]  # quick preview

Found 199 SVGs under: /home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow


[PosixPath('/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow/DC1000_13-10_Nf23_Jack.svg'),
 PosixPath('/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow/DC1000_13-11_Nf23_Jack.svg'),
 PosixPath('/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow/DC1000_13-12_Nf23_Jack.svg'),
 PosixPath('/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow/DC1000_13-13_Nf23_Jack.svg'),
 PosixPath('/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow/DC1000_13-14_Nf23_Jack.svg'),
 PosixPath('/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow/DC1000_13-15_Nf23_Jack.svg'),
 PosixPath('/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow/DC1000_13-1_Nf23_Jack.svg'),
 PosixPath('/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow/DC1000_13-3_Nf23_Jack.svg'),
 PosixPath('/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow/DC1000_13-4_Nf23_Jack.svg'),
 PosixPath('/home/jrhowell/benthic_ecolo

In [2]:
import xml.etree.ElementTree as ET
from collections import Counter

def top_attribute_keys(svg_path: Path, top_n: int = 30):
    """
    Return the most common attribute keys found in an SVG.
    Useful for figuring out where labels/classes are stored (e.g., inkscape:label, id, class).
    """
    tree = ET.parse(svg_path)
    root = tree.getroot()

    counts = Counter()
    for el in root.iter():
        counts.update(el.attrib.keys())

    return counts.most_common(top_n)

# Inspect one example SVG
example_svg = svgs[6]
top_attribute_keys(example_svg, top_n=30)

[('id', 9),
 ('width', 2),
 ('height', 2),
 ('{http://www.inkscape.org/namespaces/inkscape}groupmode', 2),
 ('{http://www.inkscape.org/namespaces/inkscape}label', 2),
 ('style', 2),
 ('x', 2),
 ('y', 2),
 ('version', 1),
 ('viewBox', 1),
 ('{http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd}docname', 1),
 ('pagecolor', 1),
 ('bordercolor', 1),
 ('borderopacity', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}showpageshadow', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}pageopacity', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}pagecheckerboard', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}deskcolor', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}zoom', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}cx', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}cy', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}window-width', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}window-height', 1),
 ('{http://www.inkscape.org/namespaces/inkscape}wind

In [3]:
import xml.etree.ElementTree as ET
from pathlib import Path

# Inkscape namespaced attributes
INK_LABEL = "{http://www.inkscape.org/namespaces/inkscape}label"
INK_GROUPMODE = "{http://www.inkscape.org/namespaces/inkscape}groupmode"

# Desired final layers (must exist in every output)
TARGET_LAYERS = ["nv", "nh", "hy", "st/zo","Image",]

# Layers to delete outright
DELETE_LABELS = {"reef", "rf"}

# Map any synonyms into canonical target names
SYNONYMS = {
    "st": "st/zo",
    "zo": "st/zo",
    "st_zo": "st/zo",
    "st-zo": "st/zo",
    "stzo": "st/zo",
    "image1": "Image",
    "image": "Image",
    "image2": "Image",
}

def unique_path(out_dir: Path, filename: str) -> Path:
    """Avoid overwriting files with the same basename."""
    out_path = out_dir / filename
    if not out_path.exists():
        return out_path

    stem, suffix = out_path.stem, out_path.suffix
    i = 1
    while True:
        candidate = out_dir / f"{stem}_{i}{suffix}"
        if not candidate.exists():
            return candidate
        i += 1

def is_layer_group(el) -> bool:
    """True if element is an Inkscape layer group."""
    return el.tag.endswith("g") and el.attrib.get(INK_GROUPMODE) == "layer"

def normalize_to_target(label: str) -> str | None:
    """
    Decide which of the 5 target layers this label should contribute to,
    or return None to delete/drop it.

    Rules:
    - rf/reef -> delete
    - ns -> nv
    - anything ending with 'to vn' -> use the first part (src), with ns->nv
      e.g. "nh to vn" -> nh
           "ns to vn" -> nv
    - keep nv/nh/hy/st/zo (including synonyms st/zo handling)
    - everything else -> delete
    """
    if not label:
        return None

    s = label.strip().lower()

    # delete reef/rf always
    if s in DELETE_LABELS:
        return None

    # ns merges into nv
    if s == "ns":
        return "nv"

    # transitions like "nh to vn" / "nv to vn"
    if " to " in s:
        src, dst = s.split(" to ", 1)
        src = src.strip()
        dst = dst.strip()

        # only apply your "ends with vn" rule when destination is vn
        if src == "ns":
            return "nv"
        src = SYNONYMS.get(src, src)
        if dst == "vn":
            final_label = src
        elif dst == "ns":
            final_label = "nv"
        else:
            final_label = dst
        return final_label if final_label in TARGET_LAYERS else None
        

    # canonicalize st/zo variants
    s = SYNONYMS.get(s, s)

    # keep only the target layers
    if s in TARGET_LAYERS:
        return s

    # drop everything else (including "Image")
    return None

def ensure_target_layers(root):
    """
    Ensure the SVG has layer groups for each target label.
    Returns dict: label -> element.
    """
    layers = {}

    # find existing layer groups
    for el in root.iter():
        if is_layer_group(el) and INK_LABEL in el.attrib:
            lab = el.attrib[INK_LABEL].strip().lower()
            lab = SYNONYMS.get(lab, lab)
            if lab in TARGET_LAYERS and lab not in layers:
                # enforce canonical casing exactly as in TARGET_LAYERS
                el.attrib[INK_LABEL] = lab
                layers[lab] = el

    # create missing layers at the end of the root (SVG top-level is typical)
    # If your layers are nested under a specific parent, we can adjust, but this works for most Inkscape SVGs.
    for lab in TARGET_LAYERS:
        if lab not in layers:
            g = ET.Element("g", {
                INK_GROUPMODE: "layer",
                INK_LABEL: lab
            })
            root.append(g)
            layers[lab] = g

    return layers

def relabel_svg_to_four_layers(in_path: Path, out_dir: Path) -> tuple[Path, int, int, int]:
    """
    Convert an SVG so it contains only 5 final layers: nv, nh, hy, st/zo.
    Moves contents from other layers into these or deletes them per rules.

    Returns:
      (output_path, layers_moved, layers_deleted, elements_moved)
    """
    tree = ET.parse(in_path)
    svg_root = tree.getroot()

    # Make sure target layers exist (even if empty)
    target_layer_elems = ensure_target_layers(svg_root)

    layers_moved = 0
    layers_deleted = 0
    elements_moved = 0

    # We need parent pointers to remove layers safely.
    # Build a parent map once.
    parent_of = {child: parent for parent in svg_root.iter() for child in list(parent)}

    # Collect layer groups first (stable list)
    layer_groups = [el for el in svg_root.iter() if is_layer_group(el) and INK_LABEL in el.attrib]

    for layer in layer_groups:
        old_label_raw = layer.attrib.get(INK_LABEL, "")
        old_label = old_label_raw.strip().lower()

        target = normalize_to_target(old_label)

        # If this is already one of the target layers, keep it (but normalize name)
        old_label_norm = SYNONYMS.get(old_label, old_label)
        if old_label_norm in TARGET_LAYERS and target == old_label_norm:
            layer.attrib[INK_LABEL] = old_label_norm
            continue

        # If target is None: delete entire layer group
        if target is None:
            parent = parent_of.get(layer)
            if parent is not None:
                parent.remove(layer)
                layers_deleted += 1
            continue

        # Otherwise, move children of this layer into the target layer group, then delete this layer
        target_layer = target_layer_elems[target]

        # Move all child elements (paths, shapes, etc.)
        for child in list(layer):
            layer.remove(child)
            target_layer.append(child)
            elements_moved += 1

        parent = parent_of.get(layer)
        if parent is not None:
            parent.remove(layer)
            layers_moved += 1

    # Final guarantee: only keep layer groups with labels in TARGET_LAYERS (delete any stragglers)
    parent_of = {child: parent for parent in svg_root.iter() for child in list(parent)}
    for el in list(svg_root.iter()):
        if is_layer_group(el) and INK_LABEL in el.attrib:
            lab = SYNONYMS.get(el.attrib[INK_LABEL].strip().lower(), el.attrib[INK_LABEL].strip().lower())
            if lab not in TARGET_LAYERS:
                parent = parent_of.get(el)
                if parent is not None:
                    parent.remove(el)
                    layers_deleted += 1

    # Write output (flat directory)
    out_dir.mkdir(parents=True, exist_ok=True)
    out_path = unique_path(out_dir, in_path.name)
    tree.write(out_path, encoding="utf-8", xml_declaration=True)

    return out_path, layers_moved, layers_deleted, elements_moved

In [4]:
from pathlib import Path

SRC_ROOT = Path("/home/jrhowell/benthic_ecology_group/Jack/coral_seg/new_workflow")
OUT_DIR  = Path("/home/jrhowell/benthic_ecology_group/Jack/coral_seg/four_layer_images6")

svgs = sorted(p for p in SRC_ROOT.rglob("*.svg") if "_gr.svg" not in p.name.lower())
print("Found:", len(svgs))

moved_layers = 0
deleted_layers = 0
moved_elements = 0

for p in svgs:
    _, lm, ld, em = relabel_svg_to_four_layers(p, OUT_DIR)
    moved_layers += lm
    deleted_layers += ld
    moved_elements += em

print("processed:", len(svgs))
print("layers moved/merged:", moved_layers)
print("layers deleted:", deleted_layers)
print("elements moved:", moved_elements)
print("output dir:", OUT_DIR)

Found: 199
processed: 199
layers moved/merged: 183
layers deleted: 21
elements moved: 368
output dir: /home/jrhowell/benthic_ecology_group/Jack/coral_seg/four_layer_images6


In [5]:
import xml.etree.ElementTree as ET

p = next(OUT_DIR.glob("*.svg"))
tree = ET.parse(p)
root = tree.getroot()

labels = []
for el in root.iter():
    if el.attrib.get(INK_GROUPMODE) == "layer" and INK_LABEL in el.attrib:
        labels.append(el.attrib[INK_LABEL])

print("Layer labels:", labels)
print("Unique:", sorted(set(l.strip().lower() for l in labels)))

Layer labels: ['nv', 'nh', 'hy', 'st/zo', 'Image']
Unique: ['hy', 'image', 'nh', 'nv', 'st/zo']
