# Generative AI + GraphRAG Demo
## House/Apartment Adjacency (Kùzu-based, No-Geometry, Global-Candidate Builder)

In [None]:
# You don't need to run this cell if you have pip installed topologicpy
import sys
sys.path.append("C:/Users/sarwj/OneDrive - Cardiff University/Documents/GitHub/topologicpy/src")

In [None]:
# --- TopologicPy imports ---
from topologicpy.Vertex import Vertex
from topologicpy.Topology import Topology
from topologicpy.Dictionary import Dictionary
from topologicpy.Kuzu import Kuzu
from topologicpy.Graph import Graph

In [None]:
"""
GraphRAG Demo — House/Apartment Adjacency (Kùzu-based, No-Geometry, Global-Candidate Builder)
=====================================================================================

**What this is**
A compact, Jupyter-friendly demo that:
1) Reads *TopologicPy-like* graphs (JSON) from a folder.
2) Builds *topological* graphs strictly from the file's **vertices** and **edges** (ignores geometry entirely).
3) Loads them into a Kùzu DB using your `Kuzu.py` schema (Graph, Vertex, Edge).
4) **New logic:** At each iteration, we:
   - Build a **global candidate list** of neighbor labels by querying *all graphs* for the labels present in the **currently built graph** (frequency-ranked).
   - Ask the LLM to pick **one** action: either
     - **ADD** a node (may choose from the list or propose a new label not in the list) and connect it to a chosen existing node, or
     - **CONNECT** two existing nodes (no new node).
   - Apply the action to the **working graph** (we create/update it in the DB).
   - Save a full-graph snapshot and repeat until a stopping rule is met.

Notes
-----
- We *ignore* any polygon/geometry in JSON and rely solely on `vertices` and `edges`.
- Vertices get `label` from `node_name` or `roomtype` if present; fallback to vertex id.
- `x,y,z` default to 0.0 if missing. Original vertex/edge dicts preserved in `props` JSON.
- If `OPENAI_API_KEY` is not set or OpenAI SDK is unavailable, a deterministic heuristic is used so the demo still runs.
- Edge suggestions, when accepted, are inserted with label `"suggested"` (bidirectional for simplicity).
- **Requested enhancement:** the seed node now **copies props (and x,y,z if present)** from the best-matching example across all graphs.

"""
from __future__ import annotations
import os, json, glob
from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Tuple
from collections import Counter

# --- Kùzu manager (ensure Kuzu.py is on sys.path or in the same directory) ---
from topologicpy.Kuzu import Kuzu

# --- Optional OpenAI (used only if available + key set) ---
try:
    import openai  # type: ignore
except Exception:
    openai = None

# --- Optional TopologicPy for snapshots -> real Graph objects ---
try:
    from topologicpy.Graph import Graph as TPGraph
    from topologicpy.Vertex import Vertex as TPVertex
    from topologicpy.Edge import Edge as TPEdge
    from topologicpy.Dictionary import Dictionary as TPDict
    from topologicpy.Topology import Topology as TPTopology
    _TOPOLOGICPY_AVAILABLE = True
except Exception:
    _TOPOLOGICPY_AVAILABLE = False

# ---------------------
# Data models
# ---------------------
@dataclass
class Vtx:
    id: str
    label: str
    x: float
    y: float
    z: float
    props: Dict[str, Any]

@dataclass
class ERel:
    src: str
    dst: str
    label: str
    props: Dict[str, Any]

# ---------------------
# JSON → (Vertices, Edges)
# ---------------------

def load_topologic_graph(path: str) -> tuple[list[Vtx], list[ERel]]:
    """Load a TopologicPy-like graph JSON. We ignore geometry; we only use vertices and edges dictionaries.
    Expected (flexible) shape:
    {
      "vertices": {
          "Vertex_0000": {"node_name": "Entrance", "x": 1.2, "y": 3.4, ...},
          ...
      },
      "edges": {
          "Edge_00": {"source": "Vertex_0000", "target": "Vertex_0004", "connectivity": "door", ...},
          ...
      }
    }
    """
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)

    raw_vs: Dict[str, Dict[str, Any]] = data.get("vertices", {}) or {}
    raw_es: Dict[str, Dict[str, Any]] = data.get("edges", {}) or {}

    vertices: list[Vtx] = []
    for vid, v in raw_vs.items():
        label = v.get("node_name") or v.get("roomtype") or v.get("zone_name") or str(vid)
        x = float(v.get("x", 0.0))
        y = float(v.get("y", 0.0))
        z = float(v.get("z", 0.0))
        vertices.append(Vtx(id=vid, label=str(label), x=x, y=y, z=z, props=v))

    edges: list[ERel] = []
    for eid, e in raw_es.items():
        src = str(e.get("source"))
        dst = str(e.get("target"))
        label = str(e.get("connectivity") or e.get("label") or "adjacent")
        if not src or not dst:
            continue
        edges.append(ERel(src=src, dst=dst, label=label, props=e))

    return vertices, edges

# ---------------------
# Kùzu helpers
# ---------------------

def ensure_schema(manager):
    Kuzu.EnsureSchema(manager, silent=False)


def upsert_graph(manager, graph_id: str, vertices: list[Vtx], edges: list[ERel], undirected: bool = True):
    """Insert a graph with vertices/edges into Kùzu using raw Cypher.
    If undirected=True, we create two directed edges for each input edge.
    """
    ensure_schema(manager)

    # Clear prior graph with same id
    manager.exec("MATCH (a:Vertex)-[r:Edge]->(b:Vertex) WHERE a.graph_id=$gid AND b.graph_id=$gid DELETE r;",
                 {"gid": graph_id}, write=True)
    manager.exec("MATCH (v:Vertex) WHERE v.graph_id=$gid DELETE v;", {"gid": graph_id}, write=True)
    manager.exec("MATCH (g:Graph) WHERE g.id=$id DELETE g;", {"id": graph_id}, write=True)

    # Create Graph card
    manager.exec(
        """
        CREATE (g:Graph {id:$id, label:$label, num_nodes:$n, num_edges:$m, props:$props});
        """,
        {"id": graph_id, "label": graph_id, "n": len(vertices), "m": len(edges), "props": json.dumps({})},
        write=True,
    )

    # Insert vertices
    for v in vertices:
        manager.exec(
            """
            CREATE (v:Vertex {id:$id, graph_id:$gid, label:$label, x:$x, y:$y, z:$z, props:$props});
            """,
            {
                "id": f"{graph_id}:{v.id}",
                "gid": graph_id,
                "label": v.label,
                "x": float(v.x),
                "y": float(v.y),
                "z": float(v.z),
                "props": json.dumps(v.props),
            },
            write=True,
        )

    # Insert edges (directed; if undirected, add reverse)
    for e in edges:
        params = {"a": f"{graph_id}:{e.src}", "b": f"{graph_id}:{e.dst}", "lbl": e.label, "props": json.dumps(e.props)}
        manager.exec(
            """
            MATCH (va:Vertex {id:$a}), (vb:Vertex {id:$b})
            CREATE (va)-[:Edge {label:$lbl, props:$props}]->(vb);
            """,
            params,
            write=True,
        )
        if undirected:
            manager.exec(
                """
                MATCH (va:Vertex {id:$a}), (vb:Vertex {id:$b})
                CREATE (vb)-[:Edge {label:$lbl, props:$props}]->(va);
                """,
                params,
                write=True,
            )

# --- Small builders for the *working* graph we are constructing ---

def create_graph_card_if_missing(manager, graph_id: str):
    rows = manager.exec("MATCH (g:Graph {id:$id}) RETURN 1 LIMIT 1", {"id": graph_id}, write=False) or []
    if not rows:
        manager.exec(
            "CREATE (g:Graph {id:$id, label:$id, num_nodes:0, num_edges:0, props:'{}'})",
            {"id": graph_id}, write=True)


def create_vertex(manager, graph_id: str, local_id: str, label: str, props: Dict[str,Any] | None = None,
                  x: float = 0.0, y: float = 0.0, z: float = 0.0):
    create_graph_card_if_missing(manager, graph_id)
    manager.exec(
        """
        CREATE (:Vertex {id:$id, graph_id:$gid, label:$label, x:$x, y:$y, z:$z, props:$props});
        """,
        {"id": f"{graph_id}:{local_id}", "gid": graph_id, "label": label,
         "x": float(x), "y": float(y), "z": float(z), "props": json.dumps(props or {})},
        write=True,
    )


def edge_exists(manager, graph_id: str, a_local: str, b_local: str) -> bool:
    rows = manager.exec(
        """
        MATCH (a:Vertex {id:$a})-[:Edge]->(b:Vertex {id:$b}) RETURN 1 LIMIT 1
        """,
        {"a": f"{graph_id}:{a_local}", "b": f"{graph_id}:{b_local}"}, write=False,
    ) or []
    return len(rows) > 0


def create_edge_bidirectional(manager, graph_id: str, a_local: str, b_local: str, label: str = "suggested",
                              props: Dict[str,Any] | None = None):
    a_local = a_local.split()[0]
    b_local = b_local.split()[0]
    if edge_exists(manager, graph_id, a_local, b_local) and edge_exists(manager, graph_id, b_local, a_local):
        print("Edge already exists. Skipping.")
        return False
    manager.exec(
        """
        MATCH (a:Vertex {id:$a}), (b:Vertex {id:$b})
        CREATE (a)-[:Edge {label:$lbl, props:$props}]->(b),
               (b)-[:Edge {label:$lbl, props:$props}]->(a);
        """,
        {"a": f"{graph_id}:{a_local}", "b": f"{graph_id}:{b_local}", "lbl": label, "props": json.dumps(props or {})},
        write=True,
    )
    return True


def list_working_nodes(manager, graph_id: str) -> list[Dict[str,str]]:
    rows = manager.exec(
        "MATCH (v:Vertex) WHERE v.graph_id=$gid RETURN v.id AS id, v.label AS label, v.props AS props ORDER BY id",
        {"gid": graph_id}, write=False
    ) or []
    return [{"id": r["id"].split(":",1)[1], "label": r.get("label",""), "props": r.get("props")} for r in rows]

def list_working_edges(manager, graph_id: str) -> list[Dict[str, str]]:
    """
    Returns all edges in the current working graph as a list of dicts:
    [{'a': 'n0', 'b': 'n1', 'label': 'suggested', 'props': {...}}, ...]
    """
    rows = manager.exec(
        """
        MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
        WHERE a.graph_id=$gid AND b.graph_id=$gid
        RETURN a.id AS a, b.id AS b, r.label AS label, r.props AS props
        """,
        {"gid": graph_id}, write=False
    ) or []
    return [
        {
            "a": r["a"].split(":", 1)[1],
            "b": r["b"].split(":", 1)[1],
            "label": r.get("label", ""),
            "props": r.get("props", {}),
        }
        for r in rows
    ]

# ---------------------
# Global candidate list (across *all* graphs)
# ---------------------

def fetch_all_pairs(manager) -> list[tuple[str,str]]:
    rows = manager.exec(
        "MATCH (a:Vertex)-[:Edge]->(b:Vertex) RETURN a.label AS a_label, b.label AS b_label",
        {}, write=False
    ) or []
    out = []
    for r in rows:
        a = str(r.get("a_label") or "").strip()
        b = str(r.get("b_label") or "").strip()
        if a and b:
            out.append((a, b))
    return out


def candidate_counts_for_labels(manager, labels: list[str]) -> list[tuple[str,int]]:
    """Aggregate neighbor label frequencies across *all* graphs for any a.label in labels (case-insensitive)."""
    pairs = fetch_all_pairs(manager)
    label_set = {l.lower() for l in labels}
    cnt = Counter(b for (a,b) in pairs if a.lower() in label_set)
    if "" in cnt:
        del cnt[""]
    return sorted(cnt.items(), key=lambda kv: (-kv[1], kv[0]))

# ---------------------
# Seed props copier — find best example for a label across all graphs
# ---------------------

def find_best_example_for_label(manager, label_substring: str) -> Optional[Dict[str, Any]]:
    """Return a dict with example fields for a label across all graphs: {
         'gid','id','label','x','y','z','props'
       } selecting the most frequent exact label match if possible, else first substring match.
    """
    label_substring = label_substring.split()[0] # Remove any second words as they are usually "room" (e.g. Living Room, Dining Room)
    rows = manager.exec(
        """
        MATCH (v:Vertex)
        RETURN v.graph_id AS gid, v.id AS id, v.label AS label, v.x AS x, v.y AS y, v.z AS z, v.props AS props
        """,
        {}, write=False
    ) or []
    needle = (label_substring or "").lower()
    subs = [
        {"gid": r.get("gid"), "id": r.get("id"), "label": r.get("label",""),
         "x": r.get("x",0.0), "y": r.get("y",0.0), "z": r.get("z",0.0), "props": r.get("props",{})}
        for r in rows if needle in str(r.get("label","")) .lower()
    ]
    if not subs:
        return None
    # prefer exact (case-insensitive) matches by frequency of exact label
    exacts = [s for s in subs if s["label"].lower() == needle]
    if exacts:
        counts = Counter(s["label"] for s in exacts)
        best_label, _ = max(counts.items(), key=lambda kv: kv[1])
        for s in exacts:
            if s["label"] == best_label:
                return s
    # fallback: just take the most frequent substring label
    counts = Counter(s["label"] for s in subs)
    best_label, _ = max(counts.items(), key=lambda kv: kv[1])
    for s in subs:
        if s["label"] == best_label:
            return s
    return subs[0]

# ---------------------
# TopologicPy graph snapshots (FULL graph export)
# ---------------------

def _build_tp_graph(vertices: list[Dict[str, Any]], edges: list[Tuple[str, str]]) -> Any:
    """Return a TopologicPy Graph if available; otherwise a plain dict with vertices/edges.
    Vertices: list of dicts {id, label, x, y, z, props}
    Edges: list of (src_local_id, dst_local_id)
    """
    import random

    if _TOPOLOGICPY_AVAILABLE:
        id_to_vertex: Dict[str, Any] = {}
        tp_vertices: list[Any] = []
        for v in vertices:
            x = random.uniform(0,10)
            y = random.uniform(0,10)
            z = 0
            vx = TPVertex.ByCoordinates(x,y,z)
            # Attach dictionary from props (inherited from graph DB)
            props = v.get("props", {})
            if isinstance(props, str):
                try:
                    props = json.loads(props)
                except Exception:
                    props = {"_raw_props": props}
            if isinstance(props, dict) and props:
                keys = list(props.keys())
                vals = list(props.values())
                try:
                    d = TPDict.ByKeysValues(keys, vals)
                    vx = TPTopology.SetDictionary(vx, d)
                except Exception:
                    pass
            id_to_vertex[v["id"]] = vx
            tp_vertices.append(vx)
        tp_edges: list[Any] = []
        for (s, t) in edges:
            sv = id_to_vertex.get(s)
            tv = id_to_vertex.get(t)
            if sv is not None and tv is not None:
                tp_edges.append(TPEdge.ByStartVertexEndVertex(sv, tv))
        try:
            return TPGraph.ByVerticesEdges(tp_vertices, tp_edges)
        except Exception:
            return {"vertices": tp_vertices, "edges": tp_edges}
    else:
        return {"vertices": vertices, "edges": edges}


def snapshot_full_graph(manager, graph_id: str) -> Any:
    """Export the **entire current graph** from Kùzu and return a TopologicPy Graph (if available) or a dict.
    Ensures vertex dictionaries are inherited from DB `props`.
    """
    rows_v = manager.exec(
        """
        MATCH (v:Vertex)
        WHERE v.graph_id=$gid
        RETURN v.id AS id, v.label AS label, v.x AS x, v.y AS y, v.z AS z, v.props AS props
        """,
        {"gid": graph_id}, write=False,
    ) or []

    rows_e = manager.exec(
        """
        MATCH (a:Vertex)-[:Edge]->(b:Vertex)
        WHERE a.graph_id=$gid AND b.graph_id=$gid
        RETURN a.id AS a, b.id AS b
        """,
        {"gid": graph_id}, write=False,
    ) or []

    verts = [{
        "id": r["id"].split(":",1)[1],
        "label": r.get("label",""),
        "x": r.get("x",0.0),
        "y": r.get("y",0.0),
        "z": r.get("z",0.0),
        "props": r.get("props", {})
    } for r in rows_v]
    eds = [(r["a"].split(":",1)[1], r["b"].split(":",1)[1]) for r in rows_e]
    return _build_tp_graph(verts, eds)

# ---------------------
# Global-candidate logic + LLM action picker — single action per iteration
# ---------------------

def _heuristic_pick_action(current_nodes: list[Dict[str,str]], candidate_counts: list[tuple[str,int]]):
    existing_labels = {n["label"].lower() for n in current_nodes}
    # Try ADD a high-frequency label not already present
    for lab, _ in candidate_counts:
        if lab.lower() not in existing_labels:
            attach_to = current_nodes[0]["id"] if current_nodes else None
            return {"action": "add", "new_label": lab, "attach_to": attach_to}
    # Else CONNECT first two nodes if any
    if len(current_nodes) >= 2:
        return {"action": "connect", "a": current_nodes[0]["id"], "b": current_nodes[1]["id"]}
    return {"action": "stop", "reason": "No candidates and insufficient nodes to connect."}


def llm_pick_action(current_nodes: list[Dict[str,str]], candidate_counts: list[tuple[str,int]], current_edges: list, house_type: str = "2 bedroom apartment"):
    """
    Ask the LLM to choose exactly one action. It knows the candidate list is frequency-sorted
    but may propose a new label not in the list. Returns one of:
      {"action":"add","new_label":"Kitchen","attach_to":"<existing_local_id>"}
      {"action":"connect","a":"<existing_local_id>","b":"<existing_local_id>"}
    """
    if (openai is None) or (os.getenv("OPENAI_API_KEY") is None):
        return _heuristic_pick_action(current_nodes, candidate_counts)

    try:
        openai.api_key = os.environ["OPENAI_API_KEY"]
        sys_prompt = (
            "You are designing a plausible house adjacency graph. All houses have kitchens and bathrooms. You receive: "
            "(1) the house type, (2) the current graph's nodes, (2) the current graph's edges, and (3) a frequency-sorted list of candidate neighbor labels "
            "aggregated from many example graphs. Build a list of essential room nodes for the specified house type."
            "You may use the provided list of current nodes or propose a new label from the list that you built."
            "Choose exactly ONE action: either ADD a new node with a single connection to an existing node, "
            "or CONNECT two existing nodes, or STOP if no further action is needed. Include a reason for stopping."
            "Do not repeate previous suggestions."
            "Return strict JSON with one of the forms:\n"
            "{\"action\":\"add\",\"new_label\":\"<string>\",\"attach_to\":\"<existing_local_id> (<string>)\"}\n"
            "{\"action\":\"connect\",\"a\":\"<existing_local_id> (<string>) \",\"b\":\"<existing_local_id> (<string>)\"}"
            "{\"action\":\"stop\",\"reason\":\"<string> \"}"
        )
        user_payload = {
            "house_type": house_type, #The type of house to be created
            "current_nodes": current_nodes,                 # list of {id,label,props}
            "current_edges": current_edges or [],   # list of edge connection
            "candidate_counts": candidate_counts,           # list of [label, count], sorted desc
            "note": "The candidate list is sorted by frequency across many graphs; you may propose a new label."
        }
        try:
            from openai import OpenAI  # type: ignore
            client = OpenAI()
            # resp = client.chat.completions.create(
            #     model="gpt-4o-mini",
            #     messages=[{"role":"system","content":sys_prompt},
            #               {"role":"user","content":json.dumps(user_payload)}],
            #     temperature=0.2,
            # )
            resp = client.chat.completions.create(
                model="gpt-5",
                messages=[{"role":"system","content":sys_prompt},
                          {"role":"user","content":json.dumps(user_payload)}],
            )
            text = resp.choices[0].message.content.strip()
        except Exception:
            resp = openai.ChatCompletion.create(
                model="gpt-4o-mini",
                messages=[{"role":"system","content":sys_prompt},
                          {"role":"user","content":json.dumps(user_payload)}],
                temperature=0.2,
            )
            text = resp["choices"][0]["message"]["content"].strip()
        try:
            json_data = json.loads(text)
            json_action = json_data['action']
            json_a_label = json_data.get('new_label', json_data.get('a'))
            json_b_label = json_data.get('attach_to', json_data.get('b'))
            if "add" in json_action.lower():
                print(f"I suggest that you {json_action.lower()} '{json_a_label}' and connect it to '{json_b_label}'")
            elif "connect" in json_action.lower():
                print(f"I suggest that you {json_action.lower()} '{json_a_label}' to '{json_b_label}'")
            elif "stop" in json_action.lower():
                print("I suggest that you stop.")
            else:
                print("I don't know what to suggest.")

            return json_data
        except Exception:
            return _heuristic_pick_action(current_nodes, candidate_counts)
    except Exception:
        return _heuristic_pick_action(current_nodes, candidate_counts)

# ---------------------
# Builder loop (new logic) — seed from dataset example, then iterate
# ---------------------

def import_folder_to_kuzu(json_folder: str, manager, undirected: bool = True) -> List[str]:
    graph_ids: List[str] = []
    for path in sorted(glob.glob(os.path.join(json_folder, "*.json"))):
        verts, edges = load_topologic_graph(path)
        gid = os.path.splitext(os.path.basename(path))[0]
        upsert_graph(manager, gid, verts, edges, undirected=undirected)
        graph_ids.append(gid)
    return graph_ids


def init_working_graph(manager, working_graph_id: str, start_label: str):
    """Create a new working graph with a seed node copied from the best dataset example for start_label."""
    # reset working graph
    manager.exec("MATCH (a:Vertex)-[r:Edge]->(b:Vertex) WHERE a.graph_id=$gid AND b.graph_id=$gid DELETE r;",
                 {"gid": working_graph_id}, write=True)
    manager.exec("MATCH (v:Vertex) WHERE v.graph_id=$gid DELETE v;", {"gid": working_graph_id}, write=True)
    manager.exec("MATCH (g:Graph) WHERE g.id=$id DELETE g;", {"id": working_graph_id}, write=True)
    create_graph_card_if_missing(manager, working_graph_id)

    ex = find_best_example_for_label(manager, start_label)
    if ex is None:
        # fallback: minimal seed
        create_vertex(manager, working_graph_id, local_id="n0", label=start_label,
                      props={"source": "seed", "label": start_label, "roomtype": start_label}, x=0.0, y=0.0, z=0.0)
        return

    # parse props if string
    props = ex.get("props", {})
    if isinstance(props, str):
        try:
            props = json.loads(props)
        except Exception:
            props = {"_raw_props": props}

    # enrich props with provenance
    props = dict(props or {})
    props.update({
        "source": "seed_from_dataset",
        "matched_query": start_label,
        "matched_label": ex.get("label",""),
        "matched_graph_id": ex.get("gid",""),
        "matched_vertex_id": ex.get("id",""),
    })

    create_vertex(manager, working_graph_id, local_id="n0", label=ex.get("label", start_label),
                  props=props, x=float(ex.get("x",0.0)), y=float(ex.get("y",0.0)), z=float(ex.get("z",0.0)))


def graphrag_build_loop(manager, 
                        working_graph_id: str,
                        start_label: str,
                        house_type: str = "2 bedroom apartment",
                        max_steps: int = 8,
                        patience: int = 2) -> Dict[str,Any]:
    """
    New logic:
      - Start a fresh working graph; the seed node copies props from the best dataset example for `start_label`.
      - Iteratively:
          * Build global candidate list from ALL graphs using labels present in the working graph.
          * Ask LLM to choose exactly one action (ADD or CONNECT) — it may propose a label not in the list.
          * Apply action to working graph (may create node or connect existing nodes).
          * Snapshot the full working graph.
      - Stop at max_steps or if no effective change occurs.
      - Patience: The maximum number of no effective change before giving up and stopping
    Returns: { 'snapshots': [...], 'actions': [...], 'reason': str }
    """
    init_working_graph(manager, working_graph_id, start_label)
    snapshots = [snapshot_full_graph(manager, working_graph_id)]
    actions_log: list[Dict[str,Any]] = []

    no_action = 0
    for step in range(1, max_steps+1):
        current_nodes = list_working_nodes(manager, working_graph_id)
        current_edges = list_working_edges(manager, working_graph_id)
        labels_now = [n["label"] for n in current_nodes]
        cand_counts = candidate_counts_for_labels(manager, labels_now)

        action = llm_pick_action(current_nodes, cand_counts, current_edges=current_edges, house_type=house_type)

        if action.get("action") == "add":
            new_label = str(action.get("new_label") or "").strip()
            attach_to = str(action.get("attach_to") or "").strip()
            # ensure attach_to is a valid existing local id; if not, pick first
            existing_ids = {n["id"] for n in current_nodes}
            if attach_to not in existing_ids:
                attach_to = next(iter(existing_ids), None)
            if new_label and attach_to:
                new_id = f"n{len(current_nodes)}"
                # attempt to copy props from best example for new_label
                ex = find_best_example_for_label(manager, new_label)
                props = {}
                x = y = z = 0.0
                if ex is not None:
                    props = ex.get("props", {})
                    if isinstance(props, str):
                        try: props = json.loads(props)
                        except Exception: props = {"_raw_props": props}
                    props = dict(props or {})
                    props.update({
                        "source": "suggested_node_from_dataset",
                        "matched_label": ex.get("label",""),
                        "matched_graph_id": ex.get("gid",""),
                        "matched_vertex_id": ex.get("id",""),
                    })
                    x = float(ex.get("x",0.0)); y = float(ex.get("y",0.0)); z = float(ex.get("z",0.0))
                else:
                    props = {"roomtype": new_label, "source": "suggested_node_no_example"}
                create_vertex(manager, working_graph_id, local_id=new_id, label=new_label,
                              props=props, x=x, y=y, z=z)
                create_edge_bidirectional(manager, working_graph_id, attach_to, new_id, label="suggested",
                                          props={"source": "llm"})
            else:
                no_action += 1

        elif action.get("action") == "connect":
            a = str(action.get("a") or "").strip()
            b = str(action.get("b") or "").strip()
            if a and b and a != b:
                applied = create_edge_bidirectional(manager, working_graph_id, a, b, label="suggested",
                                          props={"source": "llm"})
                if not applied:
                    no_action +=1
        else:
            return {"snapshots": snapshots, "actions": actions_log, "reason": action.get("reason","Stopped.")}

        actions_log.append(action)
        snapshots.append(snapshot_full_graph(manager, working_graph_id))

        if no_action > patience:
            print("Ran out of patience with no action. Stopping.")
            return {"snapshots": snapshots, "actions": actions_log, "reason": "Action produced no change."}

    return {"snapshots": snapshots, "actions": actions_log, "reason": f"Reached max steps ({max_steps})."}


## Create a Kuzu DB Manager

In [None]:
db_path = "C:/Users/sarwj/OneDrive - Cardiff University/Desktop/demo_kuzu"         # Kùzu DB directory (will be created/used)
mgr = Kuzu.Manager(db_path)

## Import the graphs and store in Kuzu (Run Once)

In [None]:
json_folder = "C:/Users/sarwj/OneDrive - Cardiff University/Desktop/msd_json/sample_graphs"        # folder with your *.json graphs
_ = Kuzu.EmptyDatabase(mgr,  recreateSchema = False)
gids = import_folder_to_kuzu(json_folder, mgr, undirected=True)
print("Imported", len(gids), "graphs")


## Conduct the Loop

In [None]:

# # Build a new working graph from a seed label that copies props from dataset
result = graphrag_build_loop(mgr,
                             working_graph_id = "work_demo",
                             start_label = "Entrance",
                             house_type = "3 bedroom apartment",
                             max_steps = 20,
                             patience = 2)
print(result["reason"])    # why it stopped
result["actions"]           # actions chosen at each step
last_graph = result["snapshots"][-1]  # TopologicPy Graph (if available) or dict
Topology.Show(last_graph, backgroundColor="white", vertexLabelKey="roomtype", showVertexLabel=True, vertexSize=10, width=400, height=400, camera=[0,0,4])

## Show the sequence of suggestions

In [None]:
for i in range(len(result["snapshots"])):
    graph = Graph.Reshape(result["snapshots"][i], silent=True)  # TopologicPy Graph (if available) or dict
    vertices = Graph.Vertices(graph)
    for v in vertices:
        d = Topology.Dictionary(v)
        print(Dictionary.Keys(d), Dictionary.Values(d))
    Topology.Show(graph, backgroundColor="white", vertexLabelKey="roomtype", showVertexLabel=True, vertexSize=10, width=400, height=400, camera=[0,0,4])