# Block Data Store POC Walkthrough
Use this notebook to demonstrate the end-to-end flow for the Block Data Store proof of concept. Each step mirrors the spec: we ingest Markdown, persist canonical blocks, query by structural/semantic filters, and perform a structural edit before rendering the final hierarchy.


## Environment setup
Import core modules and helpers used across the walkthrough.


In [None]:
import os
from pathlib import Path

from dotenv import load_dotenv
from block_data_store.db.engine import create_engine, create_session_factory
from block_data_store.db.schema import create_all
from block_data_store.models.block import BlockType
from block_data_store.parser import load_markdown_path
from block_data_store.store import create_document_store
from block_data_store.repositories.filters import (
    BooleanFilter,
    LogicalOperator,
    ParentFilter,
    PropertyFilter,
    WhereClause,
)

## 1. Parse the sample policy into blocks
Convert the Markdown policy into canonical `Block` models using the parser pipeline.


In [None]:
markdown_path = Path("data/policy_overview.md")
blocks = load_markdown_path(markdown_path)
len(blocks)

## 2. Persist blocks to SQLite
Initialise an in-memory SQLite database, upsert the blocks, and capture the document identifier.


In [None]:
load_dotenv()
USE_POSTGRES = False  # Toggle this flag to switch backends
SQLITE_PATH = Path('notebooks/data/block_demo.sqlite3')

if USE_POSTGRES:
    database_url = os.environ.get('DATABASE_URL') or os.environ.get('POSTGRES_TEST_URL')
    if not database_url:
        raise RuntimeError('Set DATABASE_URL or POSTGRES_TEST_URL when USE_POSTGRES=True')
else:
    SQLITE_PATH.parent.mkdir(parents=True, exist_ok=True)
    database_url = f'sqlite+pysqlite:///{SQLITE_PATH.as_posix()}'


In [None]:
engine = create_engine(database_url)
create_all(engine)
session_factory = create_session_factory(engine)
store = create_document_store(session_factory)
store.save_blocks(blocks)
document_id = blocks[0].id
document_id


## 3. Inspect document hierarchy (depth=1)
Hydrate the document with one level of children to validate parent-owned ordering and section titles.


In [None]:
document = store.get_root_tree(document_id, depth=1)
[(child.type.value, child.properties.get("title")) for child in document.children()]

## 4. Query controls dataset records
Filter for `record` blocks whose parent dataset has `dataset_type = "controls"`.


In [None]:
controls = store.query_blocks(
    where=WhereClause(type=BlockType.RECORD, root_id=document.root_id),
    parent=ParentFilter(
        where=WhereClause(type=BlockType.DATASET),
        property_filter=PropertyFilter(path="dataset_type", value="controls"),
    ),
)
[(record.properties.get("title"), record.properties.get("category")) for record in controls]

### Boolean filter composition
We can now combine property predicates with AND/OR/NOT to express richer queries.

In [None]:
preventive_active = BooleanFilter(
    operator=LogicalOperator.AND,
    operands=(
        PropertyFilter(path="content.object.status", value="Active"),
        BooleanFilter(
            operator=LogicalOperator.OR,
            operands=(
                PropertyFilter(path="properties.category", value="Preventive"),
                PropertyFilter(path="properties.category", value="Detective"),
            ),
        ),
    ),
)

records = store.query_blocks(
    where=WhereClause(type=BlockType.RECORD, root_id=document.root_id),
    property_filter=preventive_active,
)
[(record.properties.get("title"), record.content.get("object", {}).get("status")) for record in records]

## 5. Reorder paragraph blocks
Reverse the order of the section's paragraphs to exercise optimistic locking and structural edits.


In [None]:
section = document.children()[0]
new_order = list(reversed(section.children_ids))
store.set_children(section.id, new_order)
updated_section = store.get_block(section.id, depth=1)
[child.content.get("text", "")[:60] for child in updated_section.children()]

## 6. Render full hierarchy
Materialise the entire document tree and print a simple text representation for validation.


In [None]:
def render_tree(block, indent=0):
    prefix = " " * indent
    label = block.properties.get("title") or block.metadata.get("dataset_type")
    if not label and isinstance(block.content, dict):
        text = (block.content.get("text") or "").splitlines()[0]
        label = text[:60]
    print(f"{prefix}- {block.type.value}: {label or ''}")
    for child in block.children():
        render_tree(child, indent + 2)


render_tree(store.get_root_tree(document_id, depth=None))