In [6]:
import sys
from pathlib import Path

# Adjust to your actual project root
project_root = Path().resolve().parent  # or adjust the level as needed

sys.path.append(str(project_root))

from ipytree import Node, Tree
import ipywidgets as W
import json, pandas as pd
from collections import defaultdict
from _data_layer.registry import find, _entry_id
from _data_layer.api import _backend

# ────────────────────────────────────────────────────────────────────────────
# Load one topic:hierarchy artefact (pick the newest for demo)
hier_art = next(r for r in find(stage="topic:hierarchy"))  # choose as you like
hier_data_raw = _backend(hier_art["backend"]).load(hier_art["data_ref"])

# normalise to list[dict]
if isinstance(hier_data_raw, (bytes, bytearray)):
    hier_data_raw = hier_data_raw.decode("utf‑8")
if isinstance(hier_data_raw, str):
    try:
        hierarchy = json.loads(hier_data_raw)
    except json.JSONDecodeError:
        hierarchy = [json.loads(l) for l in hier_data_raw.splitlines() if l.strip()]
elif isinstance(hier_data_raw, pd.DataFrame):
    hierarchy = hier_data_raw.to_dict("records")
else:
    hierarchy = hier_data_raw

# ────────────────────────────────────────────────────────────────────────────
# Build parent → children map
children_map = defaultdict(list)
for row in hierarchy:
    children_map[row["parent"]].append(row)

# Identify root(s)
all_children = {row["child"] for row in hierarchy}
roots = [p for p in children_map if p not in all_children] or list(children_map)[:1]

# ────────────────────────────────────────────────────────────────────────────
def make_node(cluster_id: int) -> Node:
    """Return a lazily‑populated ipytree Node for a cluster."""
    label = f"Cluster {cluster_id}"
    n = Node(label, lazy=True)

    def _populate(*_):
        n.lazy = False
        for row in sorted(children_map.get(cluster_id, []), key=lambda r: r["child"]):
            child_id = row["child"]
            size     = row.get("child_size", "?")
            λ        = row.get("lambda_val", 0)
            child_label = f"{child_id} (λ={λ:.2f}, size={size})"
            cn = Node(child_label, lazy=True)
            cn.observe(lambda c, cid=child_id, cn=cn: _populate_child(cn, cid), "opened")
            n.add_node(cn)

    def _populate_child(cn, cid):
        if cn.lazy:
            cn.lazy = False
            for row in sorted(children_map.get(cid, []), key=lambda r: r["child"]):
                gchild   = row["child"]
                size     = row.get("child_size", "?")
                λ        = row.get("lambda_val", 0)
                g_label  = f"{gchild} (λ={λ:.2f}, size={size})"
                cn.add_node(Node(g_label, lazy=bool(children_map.get(gchild))))

    # first click callback
    n.observe(lambda c: _populate(), "opened")
    return n

# ────────────────────────────────────────────────────────────────────────────
# Display the tree
tree = Tree(stripes=True, multiple_select=False)
for rid in roots:
    tree.add_node(make_node(rid))

display(W.HTML(f"<h3>HDBSCAN Condensed Tree – artefact <code>{_entry_id(hier_art)}</code></h3>"))
display(tree)


HTML(value='<h3>HDBSCAN Condensed Tree\xa0– artefact <code>artifact_d542dcd258</code></h3>')

Tree(nodes=(Node(name='Cluster 421'),))