diff --git a/cmd/engine/main.go b/cmd/engine/main.go
index cade7fa..5e1968f 100644
--- a/cmd/engine/main.go
+++ b/cmd/engine/main.go
@@ -196,24 +196,41 @@ func run() error {
 	}
 	q.Register(queue.KindIngestDocument, pipeline.Handler())
 
+	// /v1/answer/pageindex gets its OWN PageIndexStrategy instance,
+	// independent of whatever selection strategy is configured in
+	// retrieval.strategy. This way the endpoint is always available
+	// (gated by retrieval.pageindex.enabled), even on a deployment
+	// using chunked-tree as its default selection path.
+	var pageIndexStrategy *retrieval.PageIndexStrategy
+	if cfg.Retrieval.PageIndex.Enabled && llmClient != nil {
+		pageIndexStrategy = buildPageIndexStrategy(cfg.Retrieval, llmClient, store)
+		logger.Info("retrieval: pageindex answer endpoint enabled",
+			"max_hops", pageIndexStrategy.MaxHops,
+			"page_content_limit", pageIndexStrategy.PageContentLimit,
+			"model_override", cfg.Retrieval.PageIndex.Model,
+		)
+	}
+
 	deps := api.Deps{
-		Logger:     logger,
-		DB:         pool,
-		Storage:    store,
-		Queue:      q,
-		Strategy:   strategy,
-		Version:    version,
-		MultiDoc:   multiDoc,
-		LLM:        llmClient,
-		LLMModel:   modelFor(cfg.LLM),
-		AnswerSpan: cfg.Retrieval.AnswerSpan,
-		Answer:     cfg.Retrieval.Answer,
-		Planner:    planner,
-		Planning:   cfg.Retrieval.Planning,
-		ReRanker:   reRanker,
-		ReRank:     cfg.Retrieval.ReRank,
-		Replay:     replayStore,
-		Abstain:    cfg.Retrieval.Abstain,
+		Logger:            logger,
+		DB:                pool,
+		Storage:           store,
+		Queue:             q,
+		Strategy:          strategy,
+		Version:           version,
+		MultiDoc:          multiDoc,
+		LLM:               llmClient,
+		LLMModel:          modelFor(cfg.LLM),
+		AnswerSpan:        cfg.Retrieval.AnswerSpan,
+		Answer:            cfg.Retrieval.Answer,
+		Planner:           planner,
+		Planning:          cfg.Retrieval.Planning,
+		ReRanker:          reRanker,
+		ReRank:            cfg.Retrieval.ReRank,
+		Replay:            replayStore,
+		Abstain:           cfg.Retrieval.Abstain,
+		PageIndexStrategy: pageIndexStrategy,
+		PageIndex:         cfg.Retrieval.PageIndex,
 	}
 
 	srv := &http.Server{
@@ -365,11 +382,36 @@ func buildStrategy(c config.RetrievalConfig, client llmgate.Client, store storag
 		}
 		a.ModelOverride = c.Agentic.Model
 		return a
+	case "pageindex":
+		return buildPageIndexStrategy(c, client, store)
 	default:
 		return retrieval.NewChunkedTree(client)
 	}
 }
 
+// buildPageIndexStrategy constructs the page-based agentic
+// strategy with the storage-backed PageLoader and the configured
+// caps. Used by buildStrategy when retrieval.strategy=pageindex AND
+// by the /v1/answer/pageindex endpoint setup (which wires its own
+// instance regardless of the selection strategy).
+//
+// The TOCProvider is left nil here. PR-A (toc-tree-builder) adds
+// documents.toc_tree + a DB-backed provider; until it lands the
+// strategy degrades to its synthesised view, which is the
+// documented fallback path.
+func buildPageIndexStrategy(c config.RetrievalConfig, client llmgate.Client, store storage.Storage) *retrieval.PageIndexStrategy {
+	p := retrieval.NewPageIndexStrategy(client)
+	p.PageLoader = storagePageLoader{s: store}
+	if c.PageIndex.MaxHops > 0 {
+		p.MaxHops = c.PageIndex.MaxHops
+	}
+	if c.PageIndex.PageContentLimit > 0 {
+		p.PageContentLimit = c.PageIndex.PageContentLimit
+	}
+	p.ModelOverride = c.PageIndex.Model
+	return p
+}
+
 // storageFetcher adapts a storage.Storage to retrieval.ContentFetcher.
 // The agentic strategy reads section bodies one at a time, so we
 // materialize the full reader contents into a []byte here rather than
@@ -385,6 +427,23 @@ func (sf storageFetcher) Get(ctx context.Context, ref string) ([]byte, error) {
 	return io.ReadAll(rc)
 }
 
+// storagePageLoader adapts a storage.Storage to
+// retrieval.PageContentLoader. Mirrors storageFetcher but lives
+// behind a separate interface so the two callers (agentic /
+// pageindex) can be wired independently. The PageIndex strategy
+// materialises section bodies once per get_pages observation, so
+// reading the full reader into a []byte is the right shape.
+type storagePageLoader struct{ s storage.Storage }
+
+func (l storagePageLoader) Load(ctx context.Context, ref string) ([]byte, error) {
+	rc, _, err := l.s.Get(ctx, ref)
+	if err != nil {
+		return nil, err
+	}
+	defer rc.Close()
+	return io.ReadAll(rc)
+}
+
 // buildTLSConfig returns a *tls.Config when direct TLS is enabled, or nil
 // when the engine should serve plaintext (behind a proxy). Returning nil
 // leaves http.Server's TLSConfig unset, which is exactly what ListenAndServe
diff --git a/config.example.yaml b/config.example.yaml
index 7de0368..a7f2579 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -95,7 +95,20 @@ llm:
     reasoning_model: "gemini-2.5-pro"
 
 retrieval:
-  # strategy: single-pass | chunked-tree
+  # strategy: single-pass | chunked-tree | agentic | pageindex
+  #
+  #   single-pass:  whole tree in one LLM call; fastest, smallest docs.
+  #   chunked-tree: split the tree, reason over slices in parallel, merge.
+  #                 The default. Scales to any tree size by trading
+  #                 context for parallelism.
+  #   agentic:      iterative outline → expand → read → done loop.
+  #                 Picks per-section IDs via a tool-using model.
+  #   pageindex:    PageIndex-style page-based agentic loop. Three
+  #                 tools (get_document_structure / get_pages / done);
+  #                 the model navigates by INCLUSIVE PAGE RANGE
+  #                 rather than by section ID. Best for paginated
+  #                 documents (SEC filings, academic PDFs) where the
+  #                 per-section interface is too noisy.
   strategy: "chunked-tree"
 
   chunked_tree:
@@ -232,6 +245,54 @@ retrieval:
     # audit flows may bump this; tight memory budgets shrink it.
     ttl_seconds: 86400
 
+  # pageindex: PageIndex-style page-based agentic strategy and its
+  # dedicated POST /v1/answer/pageindex endpoint.
+  #
+  # The strategy runs a three-tool loop:
+  #   1. get_document_structure() — returns the TOC tree (titles +
+  #      page ranges, no body text).
+  #   2. get_pages(start_page, end_page) — returns the concatenated
+  #      content of every section whose page range overlaps.
+  #   3. done(answer, cited_pages, reasoning) — terminates with the
+  #      natural-language answer plus the cited inclusive ranges.
+  #
+  # Unlike /v1/answer there's no separate synthesis call — the
+  # model emits the final answer inside the done tool call. The
+  # response carries per-page-range citations with answer-span
+  # quotes, a deterministic trace_token (replayable via
+  # /v1/replay), and an optional reasoning_trace describing every
+  # tool call. Streaming via SSE is available with `stream:true`
+  # on the request body — one event per tool call so callers
+  # watch the navigation in real time.
+  #
+  # OPT-OUT. Default enabled. Disable to unwire the endpoint
+  # (returns 501); the strategy itself can still be selected by
+  # setting `retrieval.strategy: pageindex` even when this block
+  # is disabled.
+  #
+  # Works WITHOUT a persisted TOC tree (pre-PR-A state) — the
+  # strategy synthesises a TOC view from the section list when
+  # documents.toc_tree is NULL. No request fails because of a
+  # missing TOC.
+  pageindex:
+    enabled: true
+    # Cap on LLM turns per request, including the terminal done
+    # turn. The reference PageIndex demo converges in 3-5 hops on
+    # typical questions; 8 leaves buffer for retries on parse
+    # failures and the occasional extra get_pages call.
+    max_hops: 8
+    # Cap on chars one get_pages tool call returns. 16,000 ≈ 4K
+    # tokens — enough for a 5-7 page excerpt, well under any
+    # flagship model's context window. Higher values risk burning
+    # context budget on stray full-document fetches.
+    page_content_limit: 16000
+    # Override the navigation-loop model; empty inherits the
+    # request's model (which itself falls back to the engine
+    # default). Most deployments leave this blank — navigation
+    # and answer happen in the same loop, so a "small model for
+    # navigation, large for answer" split doesn't apply.
+    model: ""
+
 ingest:
   # The summarize and HyDE stages run concurrently. This caps the total
   # number of LLM calls in flight across both stages combined, so the
diff --git a/internal/api/pageindex.go b/internal/api/pageindex.go
new file mode 100644
index 0000000..92fd540
--- /dev/null
+++ b/internal/api/pageindex.go
@@ -0,0 +1,517 @@
+package api
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/hallelx2/llmgate"
+
+	"github.com/hallelx2/vectorless-engine/pkg/db"
+	"github.com/hallelx2/vectorless-engine/pkg/retrieval"
+	"github.com/hallelx2/vectorless-engine/pkg/tree"
+)
+
+// loadTreeForPageIndex resolves the document tree for the
+// pageindex answer endpoint. Routes through the optional
+// PageIndexTreeLoader hook when set (tests), otherwise falls
+// through to the real DB.
+//
+// Kept here rather than inlined in the handler so the test seam is
+// obvious: production code path goes straight to d.DB.LoadTree;
+// tests set d.PageIndexTreeLoader to an in-memory function.
+func (d Deps) loadTreeForPageIndex(ctx context.Context, docID tree.DocumentID) (*tree.Tree, error) {
+	if d.PageIndexTreeLoader != nil {
+		return d.PageIndexTreeLoader(ctx, docID)
+	}
+	return d.DB.LoadTree(ctx, docID, standaloneOrgID, "")
+}
+
+// pageIndexAnswerRequest is the body shape for /v1/answer/pageindex.
+//
+// The endpoint mirrors /v1/answer's shape but exposes the
+// page-based loop's specific knobs (max_hops, max_pages_per_fetch)
+// plus a streaming variant. Per-request fields override the
+// PageIndexBlock config when present.
+type pageIndexAnswerRequest struct {
+	DocumentID       tree.DocumentID `json:"document_id"`
+	Query            string          `json:"query"`
+	Model            string          `json:"model"`
+	MaxHops          int             `json:"max_hops"`
+	MaxPagesPerFetch int             `json:"max_pages_per_fetch"` // chars cap; named per the spec
+	Stream           bool            `json:"stream"`
+	IncludeReasoning bool            `json:"reasoning"`
+}
+
+// handleAnswerPageIndex runs the PageIndex agentic loop end-to-end
+// and returns the model's answer + page-grounded citations in one
+// round-trip.
+//
+// The loop owns the answer: there's no separate synthesis call.
+// /v1/answer extracts spans per section and then synthesises; this
+// endpoint asks the model to emit the answer directly inside the
+// done action. Citations are per-page-range with answer-span
+// quotes pulled from the cited content via the existing
+// SpanExtractor.
+//
+// Differentiators surfaced on the response:
+//   - trace_token: replay any answer byte-for-byte (substrate
+//     reused from /v1/answer; the page-based input set is folded
+//     into the hash so cross-shape tokens never collide).
+//   - reasoning_trace: per-hop tool calls + arg summaries. Opt-in
+//     via request body "reasoning":true or query ?reasoning=true.
+//   - streaming (stream=true): SSE with one event per tool call so
+//     callers watch the navigation in real time.
+//
+// Body shape (canonical, non-streaming):
+//
+//	POST /v1/answer/pageindex
+//	{ "document_id": "...", "query": "...", "model"?: "...",
+//	  "max_hops"?: 8, "max_pages_per_fetch"?: 16000,
+//	  "stream"?: false, "reasoning"?: false }
+//
+// Response: see pageIndexAnswerResponse below.
+func (d Deps) handleAnswerPageIndex(w http.ResponseWriter, r *http.Request) {
+	if d.LLM == nil {
+		writeErr(w, http.StatusNotImplemented, "answer/pageindex endpoint requires an LLM client")
+		return
+	}
+	if d.PageIndexStrategy == nil || !d.PageIndex.Enabled {
+		writeErr(w, http.StatusNotImplemented, "pageindex strategy not configured on this server (retrieval.pageindex.enabled=false)")
+		return
+	}
+
+	var body pageIndexAnswerRequest
+	if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
+		writeErr(w, http.StatusBadRequest, "invalid json: "+err.Error())
+		return
+	}
+	if body.DocumentID == "" || body.Query == "" {
+		writeErr(w, http.StatusBadRequest, "document_id and query are required")
+		return
+	}
+	// Allow ?reasoning=true as an alternative to the body field. Same
+	// rationale as the existing /v1/query streaming flag — caller's
+	// choice of transport.
+	if r.URL.Query().Get("reasoning") == "true" {
+		body.IncludeReasoning = true
+	}
+
+	t, err := d.loadTreeForPageIndex(r.Context(), body.DocumentID)
+	if err != nil {
+		if errors.Is(err, db.ErrNotFound) {
+			writeErr(w, http.StatusNotFound, "document not found")
+			return
+		}
+		writeErr(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+
+	// Build a per-request strategy that wraps the engine's
+	// configured PageIndexStrategy. We do this because per-request
+	// overrides (max_hops, max_pages_per_fetch, model, OnEvent for
+	// streaming) must NOT mutate the shared Deps instance — Deps
+	// is read by many goroutines concurrently.
+	perReq := *d.PageIndexStrategy
+	if body.MaxHops > 0 {
+		perReq.MaxHops = body.MaxHops
+	}
+	if body.MaxPagesPerFetch > 0 {
+		perReq.PageContentLimit = body.MaxPagesPerFetch
+	}
+	// Per-request model override falls through to budget.ModelName
+	// the same way every other handler does.
+
+	budget := retrieval.ContextBudget{ModelName: body.Model}
+	if budget.ModelName == "" {
+		budget.ModelName = d.LLMModel
+	}
+
+	started := time.Now()
+
+	// Stream variant: hijack the response writer for SSE and emit
+	// one event per tool call.
+	if body.Stream {
+		d.serveAnswerPageIndexStream(w, r, &perReq, t, body, budget, started)
+		return
+	}
+
+	// Non-streaming: optionally capture reasoning trace via the
+	// OnEvent hook into an in-memory buffer.
+	var (
+		traceMu sync.Mutex
+		trace   []map[string]any
+	)
+	if body.IncludeReasoning {
+		perReq.OnEvent = func(ev retrieval.PageIndexEvent) {
+			traceMu.Lock()
+			defer traceMu.Unlock()
+			trace = append(trace, eventToTraceMap(ev))
+		}
+	}
+
+	res, err := perReq.SelectWithCost(r.Context(), t, body.Query, budget)
+	if err != nil {
+		d.Logger.Error("answer/pageindex: strategy failed", "err", err, "document_id", body.DocumentID)
+		writeErr(w, http.StatusInternalServerError, "pageindex strategy failed: "+err.Error())
+		return
+	}
+
+	citations := d.buildPageIndexCitations(r.Context(), t, res, body.Query, body.Model)
+
+	resp := map[string]any{
+		"document_id": body.DocumentID,
+		"query":       body.Query,
+		"answer":      res.Reasoning, // strategy stores the agent's answer here
+		"citations":   citations,
+		"strategy":    perReq.Name(),
+		"model":       budget.ModelName,
+		"hops_taken":  res.HopsTaken,
+		"usage": map[string]any{
+			"input_tokens":  res.Usage.InputTokens,
+			"output_tokens": res.Usage.OutputTokens,
+			"total_tokens":  res.Usage.TotalTokens,
+			"cost_usd":      res.Usage.CostUSD,
+			"llm_calls":     res.Usage.LLMCalls,
+		},
+		"elapsed_ms":  time.Since(started).Milliseconds(),
+		"trace_token": res.TraceToken,
+		"pages_read":  res.PagesRead,
+	}
+	if body.IncludeReasoning && len(trace) > 0 {
+		resp["reasoning_trace"] = trace
+	}
+
+	// Persist to the replay store. The trace token is keyed by
+	// document + sorted cited pages + model, so the same answer is
+	// fully replayable via the existing /v1/replay endpoint.
+	finalIDs := append([]tree.SectionID(nil), res.SelectedIDs...)
+	raw, err := marshalJSONForReplay(resp)
+	if err != nil {
+		writeJSON(w, http.StatusOK, resp)
+		return
+	}
+	d.writeJSONWithReplay(w, http.StatusOK, raw, res.TraceToken, retrieval.ReplayEntry{
+		DocumentID:  body.DocumentID,
+		Query:       body.Query,
+		Model:       budget.ModelName,
+		SelectedIDs: finalIDs,
+	})
+}
+
+// serveAnswerPageIndexStream handles the stream=true SSE variant.
+// Each tool call emits one `event:` line so the caller can watch
+// the navigation in real time. The final event ("answer") carries
+// the full JSON response so the client doesn't need to make a
+// second request.
+//
+// SSE format: `event: <type>\ndata: <json>\n\n` per the W3C spec.
+func (d Deps) serveAnswerPageIndexStream(w http.ResponseWriter, r *http.Request, strat *retrieval.PageIndexStrategy, t *tree.Tree, body pageIndexAnswerRequest, budget retrieval.ContextBudget, started time.Time) {
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		writeErr(w, http.StatusInternalServerError, "streaming requires http.Flusher; response writer does not support it")
+		return
+	}
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.WriteHeader(http.StatusOK)
+
+	// emitSSE serialises ev to JSON and writes one SSE record. We
+	// swallow write errors — a disconnected client shouldn't kill
+	// the strategy mid-flight; the user closing their browser is a
+	// normal end-state.
+	var writeMu sync.Mutex
+	emitSSE := func(eventType string, payload any) {
+		raw, err := json.Marshal(payload)
+		if err != nil {
+			return
+		}
+		writeMu.Lock()
+		defer writeMu.Unlock()
+		fmt.Fprintf(w, "event: %s\ndata: %s\n\n", eventType, raw)
+		flusher.Flush()
+	}
+
+	strat.OnEvent = func(ev retrieval.PageIndexEvent) {
+		emitSSE(ev.Type, ev)
+	}
+
+	// Started event so the client knows the loop is running.
+	emitSSE("started", map[string]any{
+		"document_id": body.DocumentID,
+		"query":       body.Query,
+		"strategy":    strat.Name(),
+		"model":       budget.ModelName,
+	})
+
+	res, err := strat.SelectWithCost(r.Context(), t, body.Query, budget)
+	if err != nil {
+		emitSSE("error", map[string]string{"error": err.Error()})
+		return
+	}
+
+	citations := d.buildPageIndexCitations(r.Context(), t, res, body.Query, body.Model)
+	final := map[string]any{
+		"document_id": body.DocumentID,
+		"query":       body.Query,
+		"answer":      res.Reasoning,
+		"citations":   citations,
+		"strategy":    strat.Name(),
+		"model":       budget.ModelName,
+		"hops_taken":  res.HopsTaken,
+		"usage": map[string]any{
+			"input_tokens":  res.Usage.InputTokens,
+			"output_tokens": res.Usage.OutputTokens,
+			"total_tokens":  res.Usage.TotalTokens,
+			"cost_usd":      res.Usage.CostUSD,
+			"llm_calls":     res.Usage.LLMCalls,
+		},
+		"elapsed_ms":  time.Since(started).Milliseconds(),
+		"trace_token": res.TraceToken,
+		"pages_read":  res.PagesRead,
+	}
+	emitSSE("answer", final)
+}
+
+// buildPageIndexCitations transforms the strategy's PagesRead +
+// the section tree into the response's citations array.
+//
+// One citation per cited page range (deduplicated). Each citation
+// carries:
+//   - start_page / end_page
+//   - section_ids: every section whose [PageStart,PageEnd] overlaps
+//     the range
+//   - quote / quote_start / quote_end: pulled via the existing
+//     SpanExtractor over the concatenated cited-page content. If the
+//     extractor finds no match the quote field is empty (offsets -1).
+func (d Deps) buildPageIndexCitations(ctx context.Context, t *tree.Tree, res *retrieval.Result, query, requestModel string) []map[string]any {
+	if res == nil {
+		return nil
+	}
+	// Build a citation per UNIQUE page range present in PagesRead.
+	// The set of pages the model "read" is a superset of what it
+	// cited — some get_pages calls don't end up in the final
+	// cited_pages list — but the union is the right cone of trust
+	// to surface as evidence. The trace token is computed over
+	// only the strictly-cited ranges, which the strategy already
+	// has, so citation drift doesn't break replay.
+	seen := make(map[[2]int]struct{}, len(res.PagesRead))
+	citations := make([]map[string]any, 0, len(res.PagesRead))
+
+	for _, pr := range res.PagesRead {
+		key := [2]int{pr.StartPage, pr.EndPage}
+		if _, dup := seen[key]; dup {
+			continue
+		}
+		seen[key] = struct{}{}
+
+		c := map[string]any{
+			"start_page":  pr.StartPage,
+			"end_page":    pr.EndPage,
+			"section_ids": pr.SectionIDs,
+		}
+
+		// Quote extraction is best-effort: an LLM blip or empty
+		// content returns no quote, which is a normal degradation
+		// path. We materialise the cited content from storage and
+		// run one SpanExtractor call per citation.
+		if d.LLM != nil {
+			content := d.materialiseCitedContent(ctx, t, pr.SectionIDs)
+			if strings.TrimSpace(content) != "" {
+				ext := d.pageIndexSpanExtractor(requestModel)
+				span, _, err := ext.Extract(ctx, content, query)
+				if err == nil && span != nil && span.Text != "" {
+					c["quote"] = span.Text
+					if span.Start >= 0 && span.End > span.Start {
+						c["quote_start"] = span.Start
+						c["quote_end"] = span.End
+					}
+				}
+			}
+		}
+
+		citations = append(citations, c)
+	}
+
+	// Sort citations by start_page so output ordering is stable
+	// across runs that fetch the same set of pages in different
+	// orders. Stable sort preserves the original-fetch order for
+	// citations sharing a start page (rare in practice).
+	sort.SliceStable(citations, func(i, j int) bool {
+		return citations[i]["start_page"].(int) < citations[j]["start_page"].(int)
+	})
+
+	return citations
+}
+
+// materialiseCitedContent loads + concatenates every cited
+// section's content. Used for answer-span extraction over the
+// pages the model relied on, so the quote can have real byte
+// offsets back into the cited evidence.
+//
+// Limited to 16K chars overall (the extractor's prompt budget
+// dictates this), preferring leading sections in page order so
+// the quote anchors near the start of the citation when there's
+// too much text to fit.
+func (d Deps) materialiseCitedContent(ctx context.Context, t *tree.Tree, sectionIDs []tree.SectionID) string {
+	if len(sectionIDs) == 0 {
+		return ""
+	}
+	var (
+		b      strings.Builder
+		budget = 16000
+	)
+	for _, id := range sectionIDs {
+		if b.Len() >= budget {
+			break
+		}
+		sec := t.FindByID(id)
+		if sec == nil || sec.ContentRef == "" {
+			continue
+		}
+		rc, _, err := d.Storage.Get(ctx, sec.ContentRef)
+		if err != nil {
+			continue
+		}
+		raw, err := io.ReadAll(rc)
+		rc.Close()
+		if err != nil {
+			continue
+		}
+		text := strings.TrimSpace(string(raw))
+		remaining := budget - b.Len()
+		if remaining <= 0 {
+			break
+		}
+		if len(text) > remaining {
+			text = text[:remaining]
+		}
+		b.WriteString(text)
+		b.WriteString("\n\n")
+	}
+	return b.String()
+}
+
+// pageIndexSpanExtractor builds a SpanExtractor configured for the
+// /v1/answer/pageindex endpoint. Same fall-through pattern as the
+// existing spanExtractor helper (config override → request model →
+// engine default).
+func (d Deps) pageIndexSpanExtractor(requestModel string) *retrieval.SpanExtractor {
+	model := d.AnswerSpan.Model
+	if model == "" {
+		model = requestModel
+	}
+	if model == "" {
+		model = d.LLMModel
+	}
+	ext := retrieval.NewSpanExtractor(d.LLM, model)
+	if d.AnswerSpan.MaxQuoteLen > 0 {
+		ext.MaxQuoteLen = d.AnswerSpan.MaxQuoteLen
+	}
+	return ext
+}
+
+// eventToTraceMap converts a PageIndexEvent into the
+// reasoning_trace entry shape. Only documented fields ship —
+// nothing internal leaks via the trace.
+func eventToTraceMap(ev retrieval.PageIndexEvent) map[string]any {
+	args := map[string]any{}
+	switch ev.Type {
+	case "get_pages":
+		if ev.StartPage > 0 {
+			args["start_page"] = ev.StartPage
+		}
+		if ev.EndPage > 0 {
+			args["end_page"] = ev.EndPage
+		}
+	case "done":
+		if len(ev.CitedPages) > 0 {
+			args["cited_pages"] = ev.CitedPages
+		}
+	}
+	entry := map[string]any{
+		"hop":  ev.Hop,
+		"tool": ev.Type,
+	}
+	if len(args) > 0 {
+		entry["args"] = args
+	}
+	if ev.Reasoning != "" {
+		entry["reasoning"] = ev.Reasoning
+	}
+	if ev.Note != "" {
+		entry["note"] = ev.Note
+	}
+	if ev.CharCount > 0 {
+		entry["result_chars"] = ev.CharCount
+	}
+	if len(ev.SectionIDs) > 0 {
+		entry["sections_touched"] = ev.SectionIDs
+	}
+	if ev.Answer != "" {
+		// The final-hop "done" event carries the answer; surfacing
+		// it in the trace lets a debugger see the agent's literal
+		// final-turn output alongside the formal response field.
+		entry["answer"] = ev.Answer
+	}
+	return entry
+}
+
+// pageIndexTraceTokenFromCitations exposes the same hash a
+// PageIndexStrategy emits to callers who want to recompute the
+// token client-side. The page-range string form mirrors the one
+// the strategy uses internally so the two stay in lock-step.
+//
+// Unused at the moment but useful for tests that want to assert
+// the in-response trace_token against the canonical input set —
+// kept here rather than exported from the retrieval package so
+// the API layer owns its own input wiring.
+func pageIndexTraceTokenFromCitations(docID tree.DocumentID, model string, ranges [][2]int) string {
+	strs := make([]string, 0, len(ranges))
+	for _, r := range ranges {
+		if r[0] == r[1] {
+			strs = append(strs, fmt.Sprintf("%d", r[0]))
+		} else {
+			strs = append(strs, fmt.Sprintf("%d-%d", r[0], r[1]))
+		}
+	}
+	sort.Strings(strs)
+	h := sha256.New()
+	h.Write([]byte(string(docID)))
+	h.Write([]byte{0})
+	h.Write([]byte("1-pages"))
+	h.Write([]byte{0})
+	h.Write([]byte("pageindex:" + model))
+	h.Write([]byte{0})
+	h.Write([]byte(retrieval.SystemPromptVersion))
+	for i, s := range strs {
+		if i == 0 {
+			h.Write([]byte{0})
+		} else {
+			h.Write([]byte{0})
+		}
+		h.Write([]byte("p:" + s))
+	}
+	return hex.EncodeToString(h.Sum(nil))
+}
+
+// Compile-time guard: the PageIndex strategy must satisfy
+// retrieval.CostStrategy so SelectWithCost works without a
+// type-assert dance.
+var _ retrieval.CostStrategy = (*retrieval.PageIndexStrategy)(nil)
+
+// Compile-time check that the Deps fields the handler reads are
+// the only API-layer dependencies it pulls in. If a future edit
+// adds a new dependency here the linter / vet will catch it via
+// the unused-import path.
+var _ llmgate.Client = (llmgate.Client)(nil)
diff --git a/internal/api/pageindex_test.go b/internal/api/pageindex_test.go
new file mode 100644
index 0000000..d76bfa4
--- /dev/null
+++ b/internal/api/pageindex_test.go
@@ -0,0 +1,531 @@
+package api
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/hallelx2/llmgate"
+
+	"github.com/hallelx2/vectorless-engine/pkg/config"
+	"github.com/hallelx2/vectorless-engine/pkg/db"
+	"github.com/hallelx2/vectorless-engine/pkg/retrieval"
+	"github.com/hallelx2/vectorless-engine/pkg/storage"
+	"github.com/hallelx2/vectorless-engine/pkg/tree"
+)
+
+// pageIndexScriptedLLM is the same shape as the strategy test's
+// scripted LLM but mirrored here so the api package's tests don't
+// reach into pkg/retrieval's test file.
+type pageIndexScriptedLLM struct {
+	replies []string
+	calls   int32
+}
+
+func (p *pageIndexScriptedLLM) Complete(ctx context.Context, req llmgate.Request) (*llmgate.Response, error) {
+	i := int(atomic.AddInt32(&p.calls, 1)) - 1
+	if i >= len(p.replies) {
+		return nil, fmt.Errorf("scripted LLM exhausted at call %d", i+1)
+	}
+	return &llmgate.Response{Content: p.replies[i]}, nil
+}
+
+func (p *pageIndexScriptedLLM) CountTokens(ctx context.Context, t string) (int, error) {
+	return len(t) / 4, nil
+}
+
+// inMemoryStorage is a minimal storage.Storage backed by a map.
+// Only Get is meaningful for the pageindex handler tests.
+type inMemoryStorage struct {
+	data map[string][]byte
+}
+
+func (m *inMemoryStorage) Put(ctx context.Context, key string, r io.Reader, meta storage.Metadata) error {
+	b, err := io.ReadAll(r)
+	if err != nil {
+		return err
+	}
+	if m.data == nil {
+		m.data = map[string][]byte{}
+	}
+	m.data[key] = b
+	return nil
+}
+
+func (m *inMemoryStorage) Get(ctx context.Context, key string) (io.ReadCloser, storage.Metadata, error) {
+	b, ok := m.data[key]
+	if !ok {
+		return nil, storage.Metadata{}, storage.ErrNotFound
+	}
+	return io.NopCloser(bytes.NewReader(b)), storage.Metadata{Key: key, Size: int64(len(b))}, nil
+}
+
+func (m *inMemoryStorage) Delete(ctx context.Context, key string) error { return nil }
+
+func (m *inMemoryStorage) Exists(ctx context.Context, key string) (bool, error) {
+	_, ok := m.data[key]
+	return ok, nil
+}
+
+func (m *inMemoryStorage) SignedURL(ctx context.Context, key string, expiry time.Duration) (string, error) {
+	return "", nil
+}
+
+// pageIndexHandlerRouter wires only the endpoint under test. We
+// don't want middleware noise interfering with the assertion path.
+func pageIndexHandlerRouter(d Deps) http.Handler {
+	r := chi.NewRouter()
+	r.Route("/v1", func(r chi.Router) {
+		r.Post("/answer/pageindex", d.handleAnswerPageIndex)
+	})
+	return r
+}
+
+// buildPageIndexTestTree mirrors the strategy tests' tree so
+// assertions about which section IDs surface in citations stay
+// consistent across the two suites.
+func buildPageIndexTestTree() *tree.Tree {
+	a1 := &tree.Section{ID: "sec_a1", ParentID: "sec_a", Title: "Install", Summary: "install steps", ContentRef: "a1_ref", PageStart: 1, PageEnd: 2}
+	a2 := &tree.Section{ID: "sec_a2", ParentID: "sec_a", Title: "Config", Summary: "config keys", ContentRef: "a2_ref", PageStart: 3, PageEnd: 4}
+	b1 := &tree.Section{ID: "sec_b1", ParentID: "sec_b", Title: "Querying", Summary: "how to query", ContentRef: "b1_ref", PageStart: 5, PageEnd: 7}
+	b2 := &tree.Section{ID: "sec_b2", ParentID: "sec_b", Title: "Debt", Summary: "long-term debt", ContentRef: "b2_ref", PageStart: 8, PageEnd: 9}
+	a := &tree.Section{ID: "sec_a", ParentID: "sec_root", Title: "Setup", Summary: "setup", Children: []*tree.Section{a1, a2}, PageStart: 1, PageEnd: 4}
+	b := &tree.Section{ID: "sec_b", ParentID: "sec_root", Title: "Usage", Summary: "usage", Children: []*tree.Section{b1, b2}, PageStart: 5, PageEnd: 9}
+	root := &tree.Section{ID: "sec_root", Title: "Atlas", Children: []*tree.Section{a, b}, PageStart: 1, PageEnd: 9}
+	return &tree.Tree{DocumentID: "doc_x", Title: "Atlas", Root: root}
+}
+
+// newTestDeps wires the minimum surface for the pageindex handler
+// to run end-to-end against httptest. The strategy is constructed
+// directly (no DB / cache wrapper) so per-test LLM scripting
+// drives behaviour deterministically.
+func newTestDeps(t *testing.T, replies ...string) (Deps, *pageIndexScriptedLLM, *inMemoryStorage) {
+	t.Helper()
+
+	llm := &pageIndexScriptedLLM{replies: replies}
+	store := &inMemoryStorage{data: map[string][]byte{
+		"a1_ref": []byte("Install steps: run vle ingest..."),
+		"a2_ref": []byte("Config keys: VLE_FOO, VLE_BAR."),
+		"b1_ref": []byte("How to query the API."),
+		"b2_ref": []byte("Debt registration is in line items A and B."),
+	}}
+	strat := retrieval.NewPageIndexStrategy(llm)
+	strat.PageLoader = pageStorageLoader{s: store}
+
+	deps := Deps{
+		Logger:            slog.Default(),
+		Storage:           store,
+		LLM:               llm,
+		LLMModel:          "test-model",
+		Strategy:          strat, // unrelated to /v1/answer/pageindex; populated for sanity
+		PageIndexStrategy: strat,
+		PageIndex:         config.PageIndexBlock{Enabled: true, MaxHops: 8, PageContentLimit: 16000},
+		AnswerSpan:        config.AnswerSpanBlock{Enabled: false},
+		Replay: retrieval.NewLRUReplayStore(retrieval.LRUReplayConfig{
+			MaxEntries: 16,
+			TTL:        5 * time.Minute,
+		}),
+		PageIndexTreeLoader: func(ctx context.Context, docID tree.DocumentID) (*tree.Tree, error) {
+			if docID != "doc_x" {
+				return nil, fmt.Errorf("unknown document %q (test loader only knows doc_x)", docID)
+			}
+			return buildPageIndexTestTree(), nil
+		},
+	}
+	return deps, llm, store
+}
+
+// pageStorageLoader adapts the in-memory storage to the
+// PageContentLoader interface the strategy expects. The
+// production engine uses an identical adapter in cmd/engine/main.go;
+// duplicating it here keeps the test self-contained.
+type pageStorageLoader struct{ s storage.Storage }
+
+func (l pageStorageLoader) Load(ctx context.Context, ref string) ([]byte, error) {
+	rc, _, err := l.s.Get(ctx, ref)
+	if err != nil {
+		return nil, err
+	}
+	defer rc.Close()
+	return io.ReadAll(rc)
+}
+
+// TestHandleAnswerPageIndexHappyPath: the canonical 3-tool
+// sequence ends with a JSON response carrying answer, citations,
+// hops_taken, trace_token, pages_read, and a usage block. The
+// LLM is NOT called for span extraction in this test path because
+// AnswerSpan.Enabled is false at the config-block level — but the
+// citations still surface section_ids and page ranges.
+func TestHandleAnswerPageIndexHappyPath(t *testing.T) {
+	t.Parallel()
+
+	deps, _, _ := newTestDeps(t,
+		`{"tool":"get_document_structure","reasoning":"orient"}`,
+		`{"tool":"get_pages","start_page":1,"end_page":2,"reasoning":"install lives here"}`,
+		`{"tool":"done","answer":"Run vle ingest.","cited_pages":[[1,2]],"reasoning":"install on pages 1-2"}`,
+	)
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"how do I install?","model":"test-model"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", rec.Code, rec.Body.String())
+	}
+	var resp map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("unmarshal response: %v\n%s", err, rec.Body.String())
+	}
+	if resp["answer"].(string) != "Run vle ingest." {
+		t.Errorf("answer = %v, want \"Run vle ingest.\"", resp["answer"])
+	}
+	if resp["strategy"].(string) != "pageindex" {
+		t.Errorf("strategy = %v, want pageindex", resp["strategy"])
+	}
+	if resp["hops_taken"].(float64) != 3 {
+		t.Errorf("hops_taken = %v, want 3", resp["hops_taken"])
+	}
+	if resp["trace_token"].(string) == "" {
+		t.Error("trace_token must be non-empty on success")
+	}
+	cits, ok := resp["citations"].([]any)
+	if !ok || len(cits) == 0 {
+		t.Fatalf("citations missing or empty: %v", resp["citations"])
+	}
+	first := cits[0].(map[string]any)
+	if first["start_page"].(float64) != 1 || first["end_page"].(float64) != 2 {
+		t.Errorf("first citation page range = %v-%v, want 1-2", first["start_page"], first["end_page"])
+	}
+	secs, ok := first["section_ids"].([]any)
+	if !ok || len(secs) == 0 {
+		t.Errorf("first citation must list section_ids, got %v", first["section_ids"])
+	}
+	// pages_read must surface the get_pages invocation
+	pages, ok := resp["pages_read"].([]any)
+	if !ok || len(pages) != 1 {
+		t.Errorf("pages_read = %v, want 1 entry", resp["pages_read"])
+	}
+}
+
+// TestHandleAnswerPageIndexReasoningTrace: with reasoning=true,
+// the response carries a reasoning_trace array describing each
+// tool call. Each entry must have hop + tool + (optional) args.
+func TestHandleAnswerPageIndexReasoningTrace(t *testing.T) {
+	t.Parallel()
+
+	deps, _, _ := newTestDeps(t,
+		`{"tool":"get_document_structure","reasoning":"orient"}`,
+		`{"tool":"get_pages","start_page":3,"end_page":4,"reasoning":"look at config"}`,
+		`{"tool":"done","answer":"Config keys are VLE_*","cited_pages":[[3,4]],"reasoning":"config on 3-4"}`,
+	)
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"config?","reasoning":true}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+
+	var resp map[string]any
+	_ = json.Unmarshal(rec.Body.Bytes(), &resp)
+	trace, ok := resp["reasoning_trace"].([]any)
+	if !ok || len(trace) != 3 {
+		t.Fatalf("reasoning_trace = %v, want 3 entries", resp["reasoning_trace"])
+	}
+	// Every entry must have a hop number and a tool tag.
+	for i, raw := range trace {
+		entry := raw.(map[string]any)
+		if _, ok := entry["hop"]; !ok {
+			t.Errorf("trace entry %d missing hop", i)
+		}
+		if _, ok := entry["tool"]; !ok {
+			t.Errorf("trace entry %d missing tool", i)
+		}
+	}
+	// The get_pages entry must surface its args.
+	if args, ok := trace[1].(map[string]any)["args"].(map[string]any); !ok {
+		t.Errorf("get_pages trace entry missing args, got %v", trace[1])
+	} else {
+		if args["start_page"].(float64) != 3 || args["end_page"].(float64) != 4 {
+			t.Errorf("trace args = %v, want start=3 end=4", args)
+		}
+	}
+}
+
+// TestHandleAnswerPageIndexReasoningTraceQueryParam: the
+// ?reasoning=true query param is an alternative to the body field.
+// Some clients prefer it for GET-friendliness when prototyping.
+func TestHandleAnswerPageIndexReasoningTraceQueryParam(t *testing.T) {
+	t.Parallel()
+
+	deps, _, _ := newTestDeps(t,
+		`{"tool":"done","answer":"x","cited_pages":[]}`,
+	)
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"q"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex?reasoning=true", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+
+	var resp map[string]any
+	_ = json.Unmarshal(rec.Body.Bytes(), &resp)
+	if _, ok := resp["reasoning_trace"]; !ok {
+		t.Errorf("?reasoning=true must produce reasoning_trace, got: %v", resp)
+	}
+}
+
+// TestHandleAnswerPageIndexBadRequest: missing document_id /
+// query → 400.
+func TestHandleAnswerPageIndexBadRequest(t *testing.T) {
+	t.Parallel()
+
+	deps, _, _ := newTestDeps(t)
+
+	for _, body := range []string{
+		`{}`,
+		`{"document_id":"doc_x"}`, // missing query
+		`{"query":"q"}`,           // missing document_id
+		`not-json`,
+	} {
+		req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", strings.NewReader(body))
+		rec := httptest.NewRecorder()
+		pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+		if rec.Code != http.StatusBadRequest {
+			t.Errorf("body %q: status = %d, want 400", body, rec.Code)
+		}
+	}
+}
+
+// TestHandleAnswerPageIndexDocumentNotFound: a tree-loader that
+// returns ErrNotFound bubbles up as 404. The test loader rejects
+// unknown doc IDs.
+func TestHandleAnswerPageIndexDocumentNotFound(t *testing.T) {
+	t.Parallel()
+
+	deps, _, _ := newTestDeps(t)
+	// Re-wire the loader to return ErrNotFound for the right error
+	// path. The default test loader returns a generic error
+	// (different status — also valid but less specific).
+	deps.PageIndexTreeLoader = func(ctx context.Context, docID tree.DocumentID) (*tree.Tree, error) {
+		return nil, dbNotFoundError()
+	}
+
+	body := strings.NewReader(`{"document_id":"missing","query":"q"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+	if rec.Code != http.StatusNotFound {
+		t.Errorf("status = %d, want 404 (body: %s)", rec.Code, rec.Body.String())
+	}
+}
+
+// TestHandleAnswerPageIndexDisabled: when PageIndex.Enabled=false
+// or PageIndexStrategy is nil, the endpoint returns 501. Two
+// failure modes, both must produce the same status.
+func TestHandleAnswerPageIndexDisabled(t *testing.T) {
+	t.Parallel()
+
+	// Mode 1: config disabled.
+	deps, _, _ := newTestDeps(t)
+	deps.PageIndex.Enabled = false
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"q"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+	if rec.Code != http.StatusNotImplemented {
+		t.Errorf("config disabled: status = %d, want 501", rec.Code)
+	}
+
+	// Mode 2: strategy nil.
+	deps2, _, _ := newTestDeps(t)
+	deps2.PageIndexStrategy = nil
+
+	body = strings.NewReader(`{"document_id":"doc_x","query":"q"}`)
+	req = httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec = httptest.NewRecorder()
+	pageIndexHandlerRouter(deps2).ServeHTTP(rec, req)
+	if rec.Code != http.StatusNotImplemented {
+		t.Errorf("strategy nil: status = %d, want 501", rec.Code)
+	}
+}
+
+// TestHandleAnswerPageIndexNoLLM: no LLM client → 501.
+func TestHandleAnswerPageIndexNoLLM(t *testing.T) {
+	t.Parallel()
+
+	deps, _, _ := newTestDeps(t)
+	deps.LLM = nil
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"q"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+	if rec.Code != http.StatusNotImplemented {
+		t.Errorf("status = %d, want 501", rec.Code)
+	}
+}
+
+// TestHandleAnswerPageIndexReplayPersisted: the response is
+// stored in the replay store under its trace_token, and the
+// existing /v1/replay handler returns the byte-identical body.
+func TestHandleAnswerPageIndexReplayPersisted(t *testing.T) {
+	t.Parallel()
+
+	deps, _, _ := newTestDeps(t,
+		`{"tool":"done","answer":"X","cited_pages":[[5,7]]}`,
+	)
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"replay-me"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("first call: status = %d, body = %s", rec.Code, rec.Body.String())
+	}
+	originalBody := rec.Body.Bytes()
+
+	var resp map[string]any
+	_ = json.Unmarshal(originalBody, &resp)
+	token := resp["trace_token"].(string)
+	if token == "" {
+		t.Fatal("trace_token must be populated for replay")
+	}
+
+	// Now hit /v1/replay with the same token + query + doc id.
+	r2 := chi.NewRouter()
+	r2.Route("/v1", func(r chi.Router) {
+		r.Post("/replay", deps.handleReplay)
+	})
+	replayBody := strings.NewReader(fmt.Sprintf(`{"trace_token":%q,"query":"replay-me","document_id":"doc_x"}`, token))
+	req2 := httptest.NewRequest(http.MethodPost, "/v1/replay", replayBody)
+	rec2 := httptest.NewRecorder()
+	r2.ServeHTTP(rec2, req2)
+	if rec2.Code != http.StatusOK {
+		t.Fatalf("replay status = %d, body = %s", rec2.Code, rec2.Body.String())
+	}
+	// The original /v1/answer/pageindex response carries a trailing
+	// newline from marshalJSONForReplay; the replay path returns
+	// the exact stored bytes, so we compare with the newline.
+	if !bytes.Equal(originalBody, rec2.Body.Bytes()) {
+		t.Errorf("replay bytes differ from original\nORIG: %s\nREP : %s", originalBody, rec2.Body.Bytes())
+	}
+}
+
+// TestHandleAnswerPageIndexStreaming: with stream=true, the
+// response is SSE with one event per tool call plus a started +
+// answer event. The data payloads are JSON.
+func TestHandleAnswerPageIndexStreaming(t *testing.T) {
+	t.Parallel()
+
+	deps, _, _ := newTestDeps(t,
+		`{"tool":"get_document_structure"}`,
+		`{"tool":"get_pages","start_page":1,"end_page":2}`,
+		`{"tool":"done","answer":"streamed","cited_pages":[[1,2]]}`,
+	)
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"q","stream":true}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("stream status = %d", rec.Code)
+	}
+	if ct := rec.Header().Get("Content-Type"); ct != "text/event-stream" {
+		t.Errorf("Content-Type = %q, want text/event-stream", ct)
+	}
+	out := rec.Body.String()
+	for _, want := range []string{
+		"event: started",
+		"event: get_document_structure",
+		"event: get_pages",
+		"event: done",
+		"event: answer",
+	} {
+		if !strings.Contains(out, want) {
+			t.Errorf("missing SSE %q in stream body:\n%s", want, out)
+		}
+	}
+}
+
+// TestHandleAnswerPageIndexPerRequestOverrides: max_hops and
+// max_pages_per_fetch on the body override the engine's config.
+// We can't measure max_pages_per_fetch from outside (it shapes
+// content size, not response shape), but we can verify max_hops
+// caps the loop. Set max_hops=1 and a script that emits
+// 5 turns — the strategy must stop after 1.
+func TestHandleAnswerPageIndexPerRequestOverrides(t *testing.T) {
+	t.Parallel()
+
+	// 6 replies but max_hops=1 → only the first runs as a normal
+	// hop, then forceDone kicks in (2 LLM calls total counting the
+	// force-done turn). The model never emits a valid done, so
+	// the response answer is empty.
+	deps, _, _ := newTestDeps(t,
+		`{"tool":"get_pages","start_page":1,"end_page":2}`,
+		`{"tool":"get_pages","start_page":3,"end_page":4}`,
+		`{"tool":"get_pages","start_page":5,"end_page":6}`,
+		`{"tool":"get_pages","start_page":7,"end_page":9}`,
+		`{"tool":"get_pages","start_page":1,"end_page":1}`,
+	)
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"q","max_hops":1}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", rec.Code, rec.Body.String())
+	}
+
+	var resp map[string]any
+	_ = json.Unmarshal(rec.Body.Bytes(), &resp)
+	// hops_taken includes the forced done turn, so cap=1 → at most
+	// 2 actual calls.
+	if hops, ok := resp["hops_taken"].(float64); !ok || hops > 2 {
+		t.Errorf("hops_taken = %v, want <=2 (max_hops=1 + 1 force-done)", resp["hops_taken"])
+	}
+}
+
+// TestHandleAnswerPageIndexTOCFallback: with a tree that has
+// page metadata but no persisted TOC, the synthesised TOC drives
+// the get_document_structure tool. This test runs end-to-end and
+// asserts the response shape; the strategy-level test covers the
+// synthesis logic directly.
+func TestHandleAnswerPageIndexTOCFallback(t *testing.T) {
+	t.Parallel()
+
+	deps, _, _ := newTestDeps(t,
+		`{"tool":"get_document_structure"}`,
+		`{"tool":"done","answer":"saw the toc","cited_pages":[]}`,
+	)
+	// PageIndexStrategy.TOC is left nil — the synthesised path is
+	// the default for any deployment without PR-A merged.
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"what is in the doc?"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", rec.Code, rec.Body.String())
+	}
+	var resp map[string]any
+	_ = json.Unmarshal(rec.Body.Bytes(), &resp)
+	if resp["answer"].(string) != "saw the toc" {
+		t.Errorf("answer = %v, want 'saw the toc'", resp["answer"])
+	}
+}
+
+// dbNotFoundError returns the real db.ErrNotFound sentinel so the
+// handler's errors.Is(err, db.ErrNotFound) check fires.
+func dbNotFoundError() error {
+	return db.ErrNotFound
+}
diff --git a/internal/api/server.go b/internal/api/server.go
index b65ab78..e0dd375 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -98,6 +98,28 @@ type Deps struct {
 	// carries abstained=true and an empty sections / citations list
 	// rather than risk hallucinating an answer from weak evidence.
 	Abstain config.AbstainBlock
+
+	// PageIndexStrategy is the dedicated page-based agentic strategy
+	// instance used by /v1/answer/pageindex. Wired in main.go from
+	// the same storage backend the rest of the engine uses, even
+	// when the selection strategy chosen by retrieval.strategy is
+	// something else. Nil disables the endpoint (returns 501) along
+	// with PageIndex.Enabled=false.
+	PageIndexStrategy *retrieval.PageIndexStrategy
+
+	// PageIndex carries the server-side config for the page-based
+	// answer endpoint. The body-level fields max_hops /
+	// max_pages_per_fetch on /v1/answer/pageindex override
+	// PageIndex.MaxHops / PageIndex.PageContentLimit per request.
+	PageIndex config.PageIndexBlock
+
+	// PageIndexTreeLoader is a test seam that overrides how the
+	// /v1/answer/pageindex handler resolves the document tree.
+	// Nil routes through d.DB.LoadTree (the production path).
+	// Tests set this to a deterministic in-memory function so the
+	// handler can run end-to-end via httptest without a real
+	// Postgres backend.
+	PageIndexTreeLoader func(ctx context.Context, docID tree.DocumentID) (*tree.Tree, error)
 }
 
 // Router builds and returns the chi router wired with v1 routes.
@@ -124,6 +146,7 @@ func Router(d Deps) http.Handler {
 		r.Post("/query", d.handleQuery)
 		r.Post("/query/multi", d.handleQueryMulti)
 		r.Post("/answer", d.handleAnswer)
+		r.Post("/answer/pageindex", d.handleAnswerPageIndex)
 		r.Post("/replay", d.handleReplay)
 	})
 
diff --git a/openapi.yaml b/openapi.yaml
index 89dc769..ddd8022 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -279,6 +279,93 @@ paths:
         "501":
           description: Endpoint not available — no LLM client configured
 
+  /v1/answer/pageindex:
+    post:
+      tags: [Query]
+      summary: PageIndex-style page-based agentic answer
+      operationId: answerPageIndex
+      description: |
+        Quote-grounded answer endpoint backed by the page-based
+        agentic strategy. The model navigates the document via a
+        three-tool loop:
+
+          1. get_document_structure() — returns the TOC tree
+             (titles + page ranges, no body text).
+          2. get_pages(start_page, end_page) — returns the
+             concatenated content of every section whose page
+             range overlaps the requested range.
+          3. done(answer, cited_pages, reasoning) — terminates
+             with the natural-language answer plus the inclusive
+             page ranges the answer relies on.
+
+        Unlike /v1/answer this endpoint runs no separate
+        synthesis call — the model produces the final answer
+        inside the done tool call. Citations are per-page-range
+        with answer-span quotes pulled from the cited content.
+
+        Differentiators:
+          - `trace_token` substrate is reused: the page-based
+            input set (sorted cited page ranges) is folded into
+            the same sha256 the per-section endpoints use, so
+            /v1/replay accepts page-index responses uniformly.
+          - `reasoning_trace` lists every tool call the agent
+            made, opt-in via body `reasoning:true` or query
+            `?reasoning=true`.
+          - Streaming (`stream:true`) returns Server-Sent
+            Events with one event per tool call.
+
+        Gracefully degrades when no LLM-built TOC tree has
+        been persisted for the document yet — the strategy
+        synthesises a TOC view from the section list rather
+        than failing the request.
+      parameters:
+        - name: reasoning
+          in: query
+          required: false
+          description: |
+            Alternative to the body's `reasoning` field. When set
+            to `true` the response carries a `reasoning_trace`
+            array describing every tool call.
+          schema:
+            type: boolean
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/PageIndexAnswerRequest"
+      responses:
+        "200":
+          description: |
+            Synthesised answer plus page-grounded citations. When
+            `stream` is `true`, the response is Server-Sent
+            Events (Content-Type `text/event-stream`) rather than
+            JSON; see the description for event types.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/PageIndexAnswerResponse"
+            text/event-stream:
+              schema:
+                type: string
+                description: |
+                  SSE event stream. Event types emitted in
+                  order: `started`, one event per tool call
+                  (`get_document_structure`, `get_pages`,
+                  `done`), and finally `answer` carrying the
+                  full PageIndexAnswerResponse payload. On
+                  error a single `error` event is emitted.
+        "400":
+          description: Missing or invalid request body.
+        "404":
+          $ref: "#/components/responses/NotFound"
+        "501":
+          description: |
+            Endpoint not available. Either the server has no LLM
+            client configured, or
+            `retrieval.pageindex.enabled=false`, or the
+            PageIndexStrategy instance was not wired at boot.
+
   /v1/replay:
     post:
       tags: [Query]
@@ -578,7 +665,7 @@ components:
           type: string
         strategy:
           type: string
-          enum: [single-pass, chunked-tree, agentic]
+          enum: [single-pass, chunked-tree, agentic, pageindex]
         model:
           type: string
         sections:
@@ -948,6 +1035,249 @@ components:
             re-rank ran. Higher means the section is more directly
             relevant to the query.
 
+    PageIndexAnswerRequest:
+      type: object
+      description: |
+        Body for POST /v1/answer/pageindex. The endpoint exposes
+        per-request overrides for the page-based loop's caps
+        (max_hops, max_pages_per_fetch) alongside the standard
+        document_id / query / model fields.
+      required: [document_id, query]
+      properties:
+        document_id:
+          type: string
+        query:
+          type: string
+          description: Natural-language query.
+        model:
+          type: string
+          description: |
+            Override the LLM model used by the navigation loop.
+            Falls back to `retrieval.pageindex.model`, then to the
+            engine's default model.
+        max_hops:
+          type: integer
+          description: |
+            Cap on the number of LLM turns the loop consumes,
+            counting the terminal `done` turn. Overrides
+            `retrieval.pageindex.max_hops` for this request only.
+            Default at the server level is 8.
+        max_pages_per_fetch:
+          type: integer
+          description: |
+            Cap on the characters one `get_pages` tool call may
+            return. Keeps a stray full-document fetch from
+            torching the model's context window. Overrides
+            `retrieval.pageindex.page_content_limit`. Default
+            16000.
+        stream:
+          type: boolean
+          description: |
+            When true, the response is Server-Sent Events: one
+            event per tool call (get_document_structure /
+            get_pages / done) followed by a terminal `answer`
+            event carrying the full PageIndexAnswerResponse
+            payload. Lets the caller WATCH the agent navigate
+            in real time rather than waiting for the final
+            answer.
+        reasoning:
+          type: boolean
+          description: |
+            When true, the response carries a `reasoning_trace`
+            array describing every tool call the agent made
+            (hop, tool tag, args, optional sections-touched and
+            result-chars metadata). Equivalent to the
+            `?reasoning=true` query parameter.
+
+    PageIndexAnswerResponse:
+      type: object
+      description: |
+        Non-streaming response shape. The `answer` field carries
+        the agent's natural-language reply produced inside the
+        terminal `done` tool call (no separate synthesis call).
+        Citations are per-page-range (not per-section) with
+        answer-span quotes pulled from the cited content.
+      properties:
+        document_id:
+          type: string
+        query:
+          type: string
+        answer:
+          type: string
+          description: |
+            Natural-language answer the agent emitted inside its
+            terminal `done` tool call. Empty when the loop hit
+            max_hops without the model finalising — callers
+            should fall through to a manual /v1/query or retry.
+        citations:
+          type: array
+          items:
+            $ref: "#/components/schemas/PageIndexCitation"
+        strategy:
+          type: string
+          enum: [pageindex]
+        model:
+          type: string
+        hops_taken:
+          type: integer
+          description: |
+            Number of LLM turns the loop consumed, counting any
+            forced terminal turn fired when max_hops was hit.
+        usage:
+          type: object
+          properties:
+            input_tokens: {type: integer}
+            output_tokens: {type: integer}
+            total_tokens: {type: integer}
+            cost_usd: {type: number}
+            llm_calls: {type: integer}
+        elapsed_ms:
+          type: integer
+        trace_token:
+          type: string
+          description: |
+            Deterministic 64-char hex sha256 token over the
+            document, system-prompt version, "pageindex:" model
+            tag, and sorted cited page ranges (e.g. ["1-2","5-7"]).
+            Two runs that cite the same pages — even via
+            different navigation paths — collapse to the same
+            token. Pass to /v1/replay with the original `query`
+            and `document_id` to fetch the byte-identical
+            response.
+        pages_read:
+          type: array
+          items:
+            $ref: "#/components/schemas/PageReadEntry"
+          description: |
+            Every `get_pages` invocation the loop made,
+            including ones whose ranges the model did not end
+            up citing. Useful for cost debugging and for
+            building a "navigation timeline" UI.
+        reasoning_trace:
+          type: array
+          items:
+            $ref: "#/components/schemas/PageIndexTraceEntry"
+          description: |
+            Per-hop tool calls + arg summaries. Present only
+            when the request opted in via `reasoning:true` or
+            `?reasoning=true`.
+
+    PageIndexCitation:
+      type: object
+      description: |
+        One citation behind the agent's answer. The pages
+        identify the cited range; `section_ids` lists every
+        section whose page range overlaps it; `quote` is the
+        verbatim answer-span the extractor judged most
+        relevant. When the extractor finds nothing in the
+        cited content the `quote` field is omitted (no
+        sentinel: absent means absent).
+      properties:
+        start_page:
+          type: integer
+          description: Inclusive first page in the citation.
+        end_page:
+          type: integer
+          description: Inclusive last page in the citation.
+        section_ids:
+          type: array
+          items:
+            type: string
+          description: |
+            Every section whose [page_start,page_end] overlaps
+            this citation, in document order.
+        quote:
+          type: string
+          description: |
+            Verbatim quote from the cited content judged most
+            relevant to the query. Omitted when no quote was
+            extracted (e.g. extractor returned nothing).
+        quote_start:
+          type: integer
+          description: |
+            Byte offset of the quote in the concatenated cited
+            content. -1 when the extractor returned a quote but
+            could not locate it verbatim (typically a
+            paraphrase).
+        quote_end:
+          type: integer
+
+    PageReadEntry:
+      type: object
+      description: |
+        One `get_pages` call the agent made during navigation.
+        Surfaces what the model actually fetched (sections
+        touched + char count) rather than what it cited, so a
+        reviewer can audit the full evidence cone.
+      properties:
+        start_page:
+          type: integer
+        end_page:
+          type: integer
+        section_ids:
+          type: array
+          items:
+            type: string
+        char_count:
+          type: integer
+          description: |
+            Number of characters returned to the model after
+            clipping at `page_content_limit`. Bytes-on-the-wire,
+            not bytes-requested.
+
+    PageIndexTraceEntry:
+      type: object
+      description: |
+        One tool call in the navigation timeline. Lets a
+        reviewer watch the agent's reasoning without paying for
+        a full streaming connection.
+      properties:
+        hop:
+          type: integer
+          description: 1-indexed turn number.
+        tool:
+          type: string
+          enum: [get_document_structure, get_pages, done]
+        reasoning:
+          type: string
+          description: |
+            One-sentence rationale the agent emitted alongside
+            the tool call. Populated only when the model
+            actually filled the `reasoning` field on the call.
+        args:
+          type: object
+          description: |
+            Tool-specific arguments. For `get_pages`,
+            `start_page` and `end_page`; for `done`,
+            `cited_pages`. Omitted for `get_document_structure`
+            (no args).
+        result_chars:
+          type: integer
+          description: |
+            Number of characters the tool result returned.
+            Populated for `get_pages` and (when present)
+            `get_document_structure`.
+        sections_touched:
+          type: array
+          items:
+            type: string
+          description: |
+            Section IDs the `get_pages` call materialised
+            content from.
+        answer:
+          type: string
+          description: |
+            The agent's final-turn answer string. Present only
+            on the terminal `done` entry; mirrors the
+            top-level `answer` field for debuggers reading the
+            trace.
+        note:
+          type: string
+          description: |
+            Optional human-readable note (e.g. "invalid range"
+            when the model asked for pages past the document's
+            end).
+
     ReplayRequest:
       type: object
       description: |
diff --git a/pkg/config/config.go b/pkg/config/config.go
index d4f2723..0f0e291 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -289,6 +289,54 @@ type RetrievalConfig struct {
 	ReRank      ReRankBlock      `yaml:"rerank"`
 	Replay      ReplayBlock      `yaml:"replay"`
 	Abstain     AbstainBlock     `yaml:"abstain"`
+	PageIndex   PageIndexBlock   `yaml:"pageindex"`
+}
+
+// PageIndexBlock configures the PageIndex page-based agentic
+// strategy and its dedicated /v1/answer/pageindex endpoint.
+//
+// The strategy is registered as a Strategy choice
+// (retrieval.strategy: pageindex) AND is wired into the
+// /v1/answer/pageindex endpoint regardless of which selection
+// strategy the server runs by default. The Enabled flag controls
+// the endpoint only — flipping it off does not unregister the
+// strategy, so a deployment that wants the strategy available
+// to /v1/query but not the dedicated answer endpoint can still
+// disable the endpoint here.
+//
+// Defaults are tuned to match the reference PageIndex demo: 8
+// hops covers structure → 3 navigation calls → done + buffer,
+// and 16,000 chars of get_pages content fits a 5-7 page excerpt
+// comfortably under any flagship model's context window.
+//
+// Per-request overrides on /v1/answer/pageindex (max_hops,
+// max_pages_per_fetch) win over this block; this block is the
+// server-side default.
+type PageIndexBlock struct {
+	// Enabled toggles the /v1/answer/pageindex endpoint. Default:
+	// true. When false, the endpoint returns 501. The
+	// PageIndexStrategy itself stays registered as a selection
+	// strategy regardless — disabling here only unwires the
+	// dedicated answer surface.
+	Enabled bool `yaml:"enabled"`
+
+	// MaxHops caps the number of LLM turns one /v1/answer/pageindex
+	// request consumes, including the terminal done turn. Default:
+	// 8. Set to 0 to use the strategy's built-in default.
+	MaxHops int `yaml:"max_hops"`
+
+	// PageContentLimit caps how many chars a single get_pages tool
+	// call returns. Default: 16000. Keeps one stray full-document
+	// fetch from torching the model's context window.
+	PageContentLimit int `yaml:"page_content_limit"`
+
+	// Model overrides the LLM model used for the navigation loop.
+	// Empty means use the request's model (which itself falls back
+	// to the engine default). Useful when navigation should run on
+	// a fast/cheap model while answering benefits from a stronger
+	// one — though the PageIndex protocol does both in the same
+	// loop, so most deployments leave this blank.
+	Model string `yaml:"model"`
 }
 
 // AbstainBlock configures the Phase 2.4 abstention behaviour.
@@ -563,6 +611,11 @@ func Default() Config {
 				Enabled: true,
 				Below:   0.4,
 			},
+			PageIndex: PageIndexBlock{
+				Enabled:          true,
+				MaxHops:          8,
+				PageContentLimit: 16000,
+			},
 		},
 		Ingest: IngestConfig{
 			GlobalLLMConcurrency: 12,
@@ -867,6 +920,27 @@ func applyEnvOverrides(c *Config) {
 			c.Retrieval.Abstain.Below = f
 		}
 	}
+	if v := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED"); v != "" {
+		switch strings.ToLower(strings.TrimSpace(v)) {
+		case "1", "true", "yes", "on":
+			c.Retrieval.PageIndex.Enabled = true
+		case "0", "false", "no", "off":
+			c.Retrieval.PageIndex.Enabled = false
+		}
+	}
+	if v := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS"); v != "" {
+		if n, err := strconv.Atoi(v); err == nil && n >= 0 {
+			c.Retrieval.PageIndex.MaxHops = n
+		}
+	}
+	if v := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT"); v != "" {
+		if n, err := strconv.Atoi(v); err == nil && n >= 0 {
+			c.Retrieval.PageIndex.PageContentLimit = n
+		}
+	}
+	if v := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MODEL"); v != "" {
+		c.Retrieval.PageIndex.Model = v
+	}
 }
 
 // Validate checks that required fields for the selected drivers are set.
@@ -920,7 +994,7 @@ func (c Config) Validate() error {
 	}
 
 	switch c.Retrieval.Strategy {
-	case "single-pass", "chunked-tree", "agentic":
+	case "single-pass", "chunked-tree", "agentic", "pageindex":
 	default:
 		return fmt.Errorf("unknown retrieval.strategy: %q", c.Retrieval.Strategy)
 	}
@@ -1000,5 +1074,12 @@ func (c Config) Validate() error {
 		return fmt.Errorf("retrieval.abstain.below must be in [0.0, 1.0], got %v", c.Retrieval.Abstain.Below)
 	}
 
+	if c.Retrieval.PageIndex.MaxHops < 0 {
+		return fmt.Errorf("retrieval.pageindex.max_hops must be >= 0, got %d", c.Retrieval.PageIndex.MaxHops)
+	}
+	if c.Retrieval.PageIndex.PageContentLimit < 0 {
+		return fmt.Errorf("retrieval.pageindex.page_content_limit must be >= 0, got %d", c.Retrieval.PageIndex.PageContentLimit)
+	}
+
 	return nil
 }
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index e4ba3a6..43cd902 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -525,7 +525,7 @@ func TestValidateLLMDrivers(t *testing.T) {
 
 func TestValidateRetrievalStrategy(t *testing.T) {
 	t.Parallel()
-	for _, s := range []string{"single-pass", "chunked-tree"} {
+	for _, s := range []string{"single-pass", "chunked-tree", "agentic", "pageindex"} {
 		cfg := Default()
 		cfg.Database.URL = "postgres://localhost/test"
 		cfg.Retrieval.Strategy = s
@@ -542,6 +542,126 @@ func TestValidateRetrievalStrategy(t *testing.T) {
 	}
 }
 
+// TestPageIndexDefaults locks in the PageIndex block's defaults so
+// a regression on shipping values is loud. Endpoint enabled by
+// default, 8 hops, 16K char limit.
+func TestPageIndexDefaults(t *testing.T) {
+	t.Parallel()
+	cfg := Default()
+	if !cfg.Retrieval.PageIndex.Enabled {
+		t.Error("retrieval.pageindex.enabled should default to true (opt-out)")
+	}
+	if cfg.Retrieval.PageIndex.MaxHops != 8 {
+		t.Errorf("max_hops = %d, want 8", cfg.Retrieval.PageIndex.MaxHops)
+	}
+	if cfg.Retrieval.PageIndex.PageContentLimit != 16000 {
+		t.Errorf("page_content_limit = %d, want 16000", cfg.Retrieval.PageIndex.PageContentLimit)
+	}
+	if cfg.Retrieval.PageIndex.Model != "" {
+		t.Errorf("model default should be empty (inherit), got %q", cfg.Retrieval.PageIndex.Model)
+	}
+}
+
+// TestPageIndexEnvOverride exercises every env knob the PageIndex
+// block exposes.
+func TestPageIndexEnvOverride(t *testing.T) {
+	prevEnabled := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED")
+	prevHops := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS")
+	prevLimit := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT")
+	prevModel := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MODEL")
+	defer func() {
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED", prevEnabled)
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS", prevHops)
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT", prevLimit)
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MODEL", prevModel)
+	}()
+
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED", "false")
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS", "12")
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT", "32000")
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MODEL", "gemini-2.0-flash")
+
+	cfg := Default()
+	applyEnvOverrides(&cfg)
+
+	if cfg.Retrieval.PageIndex.Enabled {
+		t.Error("VLE_RETRIEVAL_PAGEINDEX_ENABLED=false should disable")
+	}
+	if cfg.Retrieval.PageIndex.MaxHops != 12 {
+		t.Errorf("max_hops = %d, want 12", cfg.Retrieval.PageIndex.MaxHops)
+	}
+	if cfg.Retrieval.PageIndex.PageContentLimit != 32000 {
+		t.Errorf("page_content_limit = %d, want 32000", cfg.Retrieval.PageIndex.PageContentLimit)
+	}
+	if cfg.Retrieval.PageIndex.Model != "gemini-2.0-flash" {
+		t.Errorf("model = %q, want gemini-2.0-flash", cfg.Retrieval.PageIndex.Model)
+	}
+}
+
+// TestPageIndexEnvOverrideEnable: toggle on from an explicitly
+// disabled state.
+func TestPageIndexEnvOverrideEnable(t *testing.T) {
+	prev := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED")
+	defer os.Setenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED", prev)
+
+	cfg := Default()
+	cfg.Retrieval.PageIndex.Enabled = false
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED", "true")
+	applyEnvOverrides(&cfg)
+	if !cfg.Retrieval.PageIndex.Enabled {
+		t.Error("VLE_RETRIEVAL_PAGEINDEX_ENABLED=true should enable from disabled")
+	}
+}
+
+// TestPageIndexEnvOverrideRejectsBad: garbled numerics preserve the
+// default rather than silently zeroing the cap.
+func TestPageIndexEnvOverrideRejectsBad(t *testing.T) {
+	prevHops := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS")
+	prevLimit := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT")
+	defer func() {
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS", prevHops)
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT", prevLimit)
+	}()
+
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS", "abc")
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT", "not-a-number")
+
+	cfg := Default()
+	applyEnvOverrides(&cfg)
+	if cfg.Retrieval.PageIndex.MaxHops != 8 {
+		t.Errorf("garbage max_hops env should preserve default 8, got %d", cfg.Retrieval.PageIndex.MaxHops)
+	}
+	if cfg.Retrieval.PageIndex.PageContentLimit != 16000 {
+		t.Errorf("garbage page_content_limit env should preserve default, got %d", cfg.Retrieval.PageIndex.PageContentLimit)
+	}
+}
+
+// TestValidatePageIndexNegatives: negatives rejected by Validate.
+func TestValidatePageIndexNegatives(t *testing.T) {
+	t.Parallel()
+	cfg := Default()
+	cfg.Database.URL = "postgres://localhost/test"
+	cfg.Retrieval.PageIndex.MaxHops = -1
+	if err := cfg.Validate(); err == nil {
+		t.Error("negative max_hops should fail validation")
+	}
+
+	cfg2 := Default()
+	cfg2.Database.URL = "postgres://localhost/test"
+	cfg2.Retrieval.PageIndex.PageContentLimit = -1
+	if err := cfg2.Validate(); err == nil {
+		t.Error("negative page_content_limit should fail validation")
+	}
+
+	cfg3 := Default()
+	cfg3.Database.URL = "postgres://localhost/test"
+	cfg3.Retrieval.PageIndex.MaxHops = 0
+	cfg3.Retrieval.PageIndex.PageContentLimit = 0
+	if err := cfg3.Validate(); err != nil {
+		t.Errorf("zero values should pass (defaults applied at runtime): %v", err)
+	}
+}
+
 func TestValidateTLS(t *testing.T) {
 	t.Parallel()
 
diff --git a/pkg/retrieval/pageindex_strategy.go b/pkg/retrieval/pageindex_strategy.go
new file mode 100644
index 0000000..6df2ed2
--- /dev/null
+++ b/pkg/retrieval/pageindex_strategy.go
@@ -0,0 +1,1083 @@
+package retrieval
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/hallelx2/llmgate"
+
+	"github.com/hallelx2/vectorless-engine/pkg/tree"
+)
+
+// PageIndexStrategy is a page-based agentic retrieval loop modelled on
+// PageIndex's three-tool reasoning protocol.
+//
+// The model navigates by PAGE RANGE rather than by section ID. Each
+// turn it emits one of:
+//
+//   - get_document_structure() — returns the document's TOC tree
+//     (titles + page ranges only, no text), so the model can pick
+//     which pages to look at.
+//   - get_pages(start_page, end_page) — returns the concatenated text
+//     of every section whose [page_start, page_end] overlaps the
+//     requested range, clipped to PageContentLimit chars.
+//   - done(answer, cited_pages, reasoning) — terminates with the final
+//     answer string and the list of page ranges the answer relies on.
+//
+// This is a SUPERSET of the older AgenticStrategy's protocol: the
+// loop owns the answer, not just the selection. SelectWithCost
+// surfaces both the picked section IDs (the intersection of every
+// cited page range with the document's section map) and the literal
+// answer string via Result.Reasoning. The /v1/answer/pageindex
+// endpoint reads the answer; the legacy /v1/query callers still get
+// a section list.
+//
+// # Protocol choice
+//
+// PageIndex's original demo wires the model via the OpenAI Agents
+// SDK's native tool-calling surface. llmgate v0.2.0 declares ToolDef
+// / ToolCall as scaffolding but does not populate ToolCalls on
+// responses, so this strategy uses the same JSON-action text
+// protocol AgenticStrategy already proved (see pkg/retrieval/agentic.go).
+// When llmgate wires native tool calling the surface here is the
+// same — only the request/response plumbing changes.
+type PageIndexStrategy struct {
+	// LLM is the shared client used for every turn.
+	LLM llmgate.Client
+
+	// TOC is the source for get_document_structure observations.
+	// Implementations read documents.toc_tree (the column PR-A adds)
+	// or synthesise a tree from the section list. Nil triggers the
+	// built-in fallback that mirrors the section tree.
+	TOC TOCProvider
+
+	// PageLoader materialises section content for get_pages
+	// observations. Nil disables the get_pages tool — the model
+	// would then only see structure observations.
+	PageLoader PageContentLoader
+
+	// MaxHops caps the number of LLM turns one Select consumes,
+	// including the terminal "done" turn. Zero means use
+	// defaultPageIndexMaxHops.
+	MaxHops int
+
+	// PageContentLimit caps how many chars a single get_pages
+	// observation returns. Zero means use defaultPageContentLimit.
+	// Limits like this keep one stray request from torching the
+	// context window: a 50-page get_pages on an SEC filing can
+	// easily blow past 200K chars otherwise.
+	PageContentLimit int
+
+	// ModelOverride, if non-empty, replaces the budget's ModelName
+	// for every turn. Useful for routing the navigation loop to a
+	// cheaper or faster model than the rest of the engine.
+	ModelOverride string
+
+	// OnEvent, when non-nil, is invoked synchronously once per
+	// tool call so callers (e.g. the /v1/answer/pageindex SSE
+	// handler) can stream the navigation in real time. The hook
+	// runs inside the loop, after the tool result is computed but
+	// before the next LLM hop. Implementations MUST be cheap and
+	// MUST NOT block; a blocked hook stalls retrieval.
+	OnEvent func(PageIndexEvent)
+}
+
+// PageIndexEvent is a single observable step in the strategy's
+// navigation loop. Consumers convert these to whatever wire format
+// they need (SSE, gRPC stream, console log).
+//
+// Type carries the tool tag (get_document_structure / get_pages /
+// done). For get_pages, StartPage/EndPage/CharCount/SectionIDs are
+// populated; for done, Answer + CitedPages are populated. The Hop
+// field is the 1-indexed turn number so consumers can interleave
+// hops from concurrent requests.
+type PageIndexEvent struct {
+	Hop        int              `json:"hop"`
+	Type       string           `json:"type"`
+	Reasoning  string           `json:"reasoning,omitempty"`
+	StartPage  int              `json:"start_page,omitempty"`
+	EndPage    int              `json:"end_page,omitempty"`
+	CharCount  int              `json:"char_count,omitempty"`
+	SectionIDs []tree.SectionID `json:"section_ids,omitempty"`
+	Answer     string           `json:"answer,omitempty"`
+	CitedPages [][2]int         `json:"cited_pages,omitempty"`
+	Note       string           `json:"note,omitempty"`
+}
+
+// defaultPageIndexMaxHops bounds the loop. Eight turns is enough for
+// structure → 3 get_pages → done with two retry hops on stray bad
+// JSON, while keeping latency and cost predictable. The reference
+// PageIndex demo converges in 3-5 hops on typical questions.
+const defaultPageIndexMaxHops = 8
+
+// defaultPageContentLimit is the per-call chars cap. 16,000 chars
+// is roughly 4K tokens at GPT/Claude tokenisers — comfortably below
+// any flagship model's context but enough text for a 5-7 page
+// excerpt. Matches PageIndex's reference behaviour.
+const defaultPageContentLimit = 16000
+
+// strategyNamePageIndex is the stable identifier for config
+// (retrieval.strategy: pageindex) and telemetry.
+const strategyNamePageIndex = "pageindex"
+
+// Compile-time interface checks.
+var (
+	_ Strategy     = (*PageIndexStrategy)(nil)
+	_ CostStrategy = (*PageIndexStrategy)(nil)
+)
+
+// TOCProvider returns a JSON document-structure tree for the LLM's
+// get_document_structure tool. Implementations should return a
+// pretty-printable JSON array/object representing titles + page
+// ranges. Nodes that carry full text MUST be stripped before return —
+// the model is supposed to navigate by structure first and pull text
+// only via get_pages.
+//
+// Returning (nil, ErrNoTOC) signals "no TOC available; fall back to
+// the synthesised view". Other errors propagate.
+type TOCProvider interface {
+	GetTOC(ctx context.Context, docID tree.DocumentID) ([]byte, error)
+}
+
+// PageContentLoader returns the raw content bytes for one section,
+// keyed by its ContentRef. Strategies that need to materialise text
+// at run-time depend on this rather than on a concrete storage
+// driver — same shape as ContentFetcher; we keep them distinct so
+// the two callers (agentic / pageindex) can be wired independently
+// in main.go.
+type PageContentLoader interface {
+	Load(ctx context.Context, ref string) ([]byte, error)
+}
+
+// ErrNoTOC signals that no LLM-built TOC tree has been persisted for
+// the document yet. The strategy treats it as a graceful-degrade
+// signal: it synthesises a TOC view from the section list rather
+// than failing the request. Pre-merge of PR-A (which adds
+// documents.toc_tree) every request will degrade through this path.
+var ErrNoTOC = fmt.Errorf("retrieval: no TOC tree persisted for document")
+
+// NewPageIndexStrategy constructs a PageIndexStrategy with sensible
+// defaults. The TOC + PageLoader are nil here; the engine wires them
+// in main.go from the DB pool + storage backend. Tests pass scripted
+// implementations directly.
+func NewPageIndexStrategy(client llmgate.Client) *PageIndexStrategy {
+	return &PageIndexStrategy{
+		LLM:              client,
+		MaxHops:          defaultPageIndexMaxHops,
+		PageContentLimit: defaultPageContentLimit,
+	}
+}
+
+// Name implements Strategy.
+func (s *PageIndexStrategy) Name() string { return strategyNamePageIndex }
+
+// Select implements Strategy.
+func (s *PageIndexStrategy) Select(ctx context.Context, t *tree.Tree, query string, budget ContextBudget) ([]tree.SectionID, error) {
+	r, err := s.SelectWithCost(ctx, t, query, budget)
+	if err != nil {
+		return nil, err
+	}
+	return r.SelectedIDs, nil
+}
+
+// SelectWithCost implements CostStrategy.
+//
+// The returned Result populates:
+//
+//   - SelectedIDs: section IDs whose [PageStart,PageEnd] overlaps any
+//     cited page range. This keeps the per-section-id contract for
+//     callers (/v1/query, /v1/answer) that don't yet know about pages.
+//   - Reasoning: the agent's final answer string (the "answer" field
+//     of the done action). /v1/answer/pageindex reads this directly
+//     and skips synthesis.
+//   - PagesRead: an entry per get_pages call.
+//   - HopsTaken / Usage / TraceToken: standard.
+func (s *PageIndexStrategy) SelectWithCost(ctx context.Context, t *tree.Tree, query string, budget ContextBudget) (*Result, error) {
+	if t == nil || t.Root == nil {
+		return &Result{}, nil
+	}
+
+	model := s.ModelOverride
+	if model == "" {
+		model = budget.ModelName
+	}
+	maxHops := s.MaxHops
+	if maxHops <= 0 {
+		maxHops = defaultPageIndexMaxHops
+	}
+	pageLimit := s.PageContentLimit
+	if pageLimit <= 0 {
+		pageLimit = defaultPageContentLimit
+	}
+
+	// Pre-flatten the tree into an ordinal section list ordered by
+	// page. The get_pages observation iterates this twice per call;
+	// pre-computing keeps the inner loop O(N) instead of O(N · depth).
+	sections := flattenSectionsByPage(t)
+	maxPage := maxKnownPage(sections)
+
+	msgs := []llmgate.Message{
+		{Role: llmgate.RoleSystem, Content: pageIndexSystemPrompt},
+		{Role: llmgate.RoleUser, Content: s.initialUserPrompt(t, query, maxPage)},
+	}
+
+	var (
+		totalUsage Usage
+		hopsTaken  int
+		pagesRead  []PageReadEntry
+
+		// finalAnswer / finalCitedPages / finalReasoning are populated
+		// when the model emits a done action. citedRanges drives the
+		// final SelectedIDs (section IDs overlapping any cited range).
+		finalAnswer    string
+		finalReasoning string
+		citedRanges    []pageRange
+	)
+
+	for hop := 0; hop < maxHops; hop++ {
+		req := llmgate.Request{
+			Model:       model,
+			Messages:    msgs,
+			MaxTokens:   1536, // answers can be longer than agentic's selections
+			Temperature: 0,
+		}
+		resp, err := s.LLM.Complete(ctx, req)
+		if err != nil {
+			return nil, fmt.Errorf("pageindex hop %d: %w", hop+1, err)
+		}
+		hopsTaken++
+		totalUsage.Add(Usage{
+			InputTokens:  resp.Usage.InputTokens,
+			OutputTokens: resp.Usage.OutputTokens,
+			TotalTokens:  resp.Usage.TotalTokens,
+			CostUSD:      resp.Usage.CostUSD,
+			LLMCalls:     1,
+		})
+
+		// Record the assistant turn before parsing so the next prompt
+		// has the model's own context (matches AgenticStrategy).
+		msgs = append(msgs, llmgate.Message{
+			Role:    llmgate.RoleAssistant,
+			Content: resp.Content,
+		})
+
+		action, parseErr := ParsePageIndexAction(resp.Content)
+		if parseErr != nil {
+			log.Printf("retrieval: pageindex hop %d action parse failed: %v", hop+1, parseErr)
+			msgs = append(msgs, llmgate.Message{
+				Role:    llmgate.RoleUser,
+				Content: pageIndexParseRetryPrompt,
+			})
+			continue
+		}
+
+		switch action.Action {
+		case pageActionDone:
+			finalAnswer = strings.TrimSpace(action.Answer)
+			finalReasoning = strings.TrimSpace(action.Reasoning)
+			citedRanges = normaliseRanges(action.CitedPages, maxPage)
+			selectedIDs := sectionsOverlapping(sections, citedRanges)
+			s.emit(PageIndexEvent{
+				Hop:        hopsTaken,
+				Type:       pageActionDone,
+				Reasoning:  finalReasoning,
+				Answer:     finalAnswer,
+				CitedPages: action.CitedPages,
+			})
+			return &Result{
+				SelectedIDs: selectedIDs,
+				Reasoning:   finalAnswer, // /v1/answer/pageindex reads this
+				ModelUsed:   model,
+				Usage:       totalUsage,
+				HopsTaken:   hopsTaken,
+				PagesRead:   pagesRead,
+				TraceToken:  computePageIndexTraceToken(t.DocumentID, model, citedRanges),
+			}, nil
+
+		case pageActionStructure:
+			obs := s.renderStructure(ctx, t)
+			msgs = append(msgs, llmgate.Message{
+				Role:    llmgate.RoleUser,
+				Content: wrapPageObservation("get_document_structure", obs),
+			})
+			s.emit(PageIndexEvent{
+				Hop:       hopsTaken,
+				Type:      pageActionStructure,
+				Reasoning: action.Reasoning,
+				CharCount: len(obs),
+			})
+
+		case pageActionGetPages:
+			start, end, ok := clampRange(action.StartPage, action.EndPage, maxPage)
+			if !ok {
+				msgs = append(msgs, llmgate.Message{
+					Role: llmgate.RoleUser,
+					Content: wrapPageObservation("get_pages",
+						fmt.Sprintf("invalid range start=%d end=%d (document has %d pages). Pages are 1-indexed inclusive.",
+							action.StartPage, action.EndPage, maxPage)),
+				})
+				s.emit(PageIndexEvent{
+					Hop:       hopsTaken,
+					Type:      pageActionGetPages,
+					Reasoning: action.Reasoning,
+					StartPage: action.StartPage,
+					EndPage:   action.EndPage,
+					Note:      "invalid range",
+				})
+				continue
+			}
+			text, sectionIDs := s.renderPages(ctx, sections, start, end, pageLimit)
+			pagesRead = append(pagesRead, PageReadEntry{
+				StartPage:  start,
+				EndPage:    end,
+				SectionIDs: sectionIDs,
+				CharCount:  len(text),
+			})
+			msgs = append(msgs, llmgate.Message{
+				Role: llmgate.RoleUser,
+				Content: wrapPageObservation("get_pages",
+					fmt.Sprintf("pages %d-%d (%d sections, %d chars):\n%s", start, end, len(sectionIDs), len(text), text)),
+			})
+			s.emit(PageIndexEvent{
+				Hop:        hopsTaken,
+				Type:       pageActionGetPages,
+				Reasoning:  action.Reasoning,
+				StartPage:  start,
+				EndPage:    end,
+				CharCount:  len(text),
+				SectionIDs: sectionIDs,
+			})
+
+		default:
+			msgs = append(msgs, llmgate.Message{
+				Role: llmgate.RoleUser,
+				Content: wrapPageObservation(action.Action,
+					fmt.Sprintf("unsupported tool %q. Use one of: get_document_structure, get_pages, done.", action.Action)),
+			})
+			s.emit(PageIndexEvent{
+				Hop:  hopsTaken,
+				Type: action.Action,
+				Note: "unsupported tool",
+			})
+		}
+	}
+
+	// Ran out of hops without a done action. Force a terminal turn:
+	// give the model one final chance with an explicit "you MUST emit
+	// done now" prompt. If that also fails to parse or the model
+	// ignores the rule, we return whatever pages have been read so
+	// the caller at least sees the navigation footprint and an empty
+	// answer rather than a 500.
+	finalAnswer, finalReasoning, citedRanges = s.forceDone(ctx, &msgs, &totalUsage, &hopsTaken, model, maxPage)
+	selectedIDs := sectionsOverlapping(sections, citedRanges)
+	log.Printf("retrieval: pageindex strategy hit max_hops=%d; forced done", maxHops)
+	_ = finalReasoning
+	return &Result{
+		SelectedIDs: selectedIDs,
+		Reasoning:   finalAnswer,
+		ModelUsed:   model,
+		Usage:       totalUsage,
+		HopsTaken:   hopsTaken,
+		PagesRead:   pagesRead,
+		TraceToken:  computePageIndexTraceToken(t.DocumentID, model, citedRanges),
+	}, nil
+}
+
+// emit dispatches one event to the registered OnEvent hook, if any.
+// Hooks run synchronously inside the navigation loop and MUST be
+// cheap; callers that need to do I/O should buffer first and write
+// outside the strategy's critical path.
+func (s *PageIndexStrategy) emit(ev PageIndexEvent) {
+	if s.OnEvent != nil {
+		s.OnEvent(ev)
+	}
+}
+
+// initialUserPrompt is the very first user turn. It explains the
+// task, tells the model which page range exists ("the document has N
+// pages"), and reminds it of the action protocol. Mirrors
+// AgenticStrategy.initialUserPrompt.
+func (s *PageIndexStrategy) initialUserPrompt(t *tree.Tree, query string, maxPage int) string {
+	var b strings.Builder
+	if t.Title != "" {
+		b.WriteString("Document: ")
+		b.WriteString(t.Title)
+		b.WriteString("\n")
+	}
+	if maxPage > 0 {
+		fmt.Fprintf(&b, "Pages: 1-%d (inclusive)\n", maxPage)
+	} else {
+		b.WriteString("Pages: unknown (this document carries no page metadata; rely on get_document_structure for navigation hints).\n")
+	}
+	b.WriteString("\nUser query:\n")
+	b.WriteString(query)
+	b.WriteString("\n\nReply with a JSON action. The tools you may use are:\n")
+	b.WriteString(pageIndexActionHelp)
+	return b.String()
+}
+
+// renderStructure produces the get_document_structure observation.
+// First tries the persisted TOC tree (PR-A's documents.toc_tree
+// JSONB); if that's nil or errors, falls back to a synthesised view
+// derived from the section list. The fallback keeps this strategy
+// useful even before PR-A merges.
+func (s *PageIndexStrategy) renderStructure(ctx context.Context, t *tree.Tree) string {
+	if s.TOC != nil {
+		raw, err := s.TOC.GetTOC(ctx, t.DocumentID)
+		if err == nil && len(raw) > 0 {
+			return string(raw)
+		}
+		// Log and degrade — the strategy must keep going.
+		if err != nil {
+			log.Printf("retrieval: pageindex TOC fetch failed (degrading to synthesised view): %v", err)
+		}
+	}
+	return synthesiseTOC(t)
+}
+
+// renderPages assembles the get_pages observation: concatenates the
+// content of every section whose page range overlaps [start, end],
+// clipped to pageLimit. Returns the rendered text plus the list of
+// section IDs that contributed, in page order. SectionIDs feeds back
+// into the PageReadEntry so callers can audit which sections the
+// model actually read.
+func (s *PageIndexStrategy) renderPages(ctx context.Context, sections []sectionPageEntry, start, end, pageLimit int) (string, []tree.SectionID) {
+	if s.PageLoader == nil {
+		// Without a loader we can still emit a useful observation
+		// from titles + summaries, so the model can keep navigating.
+		return s.renderPagesNoLoader(sections, start, end, pageLimit)
+	}
+
+	var (
+		b          strings.Builder
+		sectionIDs []tree.SectionID
+		written    int
+	)
+	for _, sec := range sections {
+		if !overlaps(sec.start, sec.end, start, end) {
+			continue
+		}
+		sectionIDs = append(sectionIDs, sec.id)
+
+		// Header line so the model can ground its citations to a
+		// specific section + page range.
+		header := fmt.Sprintf("\n--- section_id=%s title=%q pages=%d-%d ---\n", sec.id, sec.title, sec.start, sec.end)
+		remaining := pageLimit - written
+		if remaining <= 0 {
+			break
+		}
+		if len(header) > remaining {
+			b.WriteString(header[:remaining])
+			written += remaining
+			break
+		}
+		b.WriteString(header)
+		written += len(header)
+
+		// Body — preferred source: storage via PageLoader. Fall back
+		// to the section summary when there's no ContentRef (internal
+		// nodes) or the loader errors.
+		body := s.loadSectionBody(ctx, sec)
+		remaining = pageLimit - written
+		if remaining <= 0 {
+			break
+		}
+		if len(body) > remaining {
+			b.WriteString(body[:remaining])
+			written += remaining
+			break
+		}
+		b.WriteString(body)
+		written += len(body)
+	}
+	return b.String(), sectionIDs
+}
+
+// renderPagesNoLoader is the degraded-mode get_pages observation
+// used when the strategy has no PageLoader (e.g. in tests, or when
+// storage is wired but momentarily unavailable). Titles + summaries
+// still let the model triangulate which range to ask about next.
+func (s *PageIndexStrategy) renderPagesNoLoader(sections []sectionPageEntry, start, end, pageLimit int) (string, []tree.SectionID) {
+	var (
+		b          strings.Builder
+		sectionIDs []tree.SectionID
+	)
+	for _, sec := range sections {
+		if !overlaps(sec.start, sec.end, start, end) {
+			continue
+		}
+		sectionIDs = append(sectionIDs, sec.id)
+		fmt.Fprintf(&b, "section_id=%s title=%q pages=%d-%d summary=%q\n", sec.id, sec.title, sec.start, sec.end, sec.summary)
+		if b.Len() >= pageLimit {
+			break
+		}
+	}
+	out := b.String()
+	if len(out) > pageLimit {
+		out = out[:pageLimit]
+	}
+	return out, sectionIDs
+}
+
+func (s *PageIndexStrategy) loadSectionBody(ctx context.Context, sec sectionPageEntry) string {
+	if sec.contentRef == "" {
+		if sec.summary != "" {
+			return fmt.Sprintf("(summary, no content loaded)\n%s", sec.summary)
+		}
+		return ""
+	}
+	data, err := s.PageLoader.Load(ctx, sec.contentRef)
+	if err != nil {
+		log.Printf("retrieval: pageindex load failed for section %s: %v", sec.id, err)
+		if sec.summary != "" {
+			return fmt.Sprintf("(content load failed: %v; using summary)\n%s", err, sec.summary)
+		}
+		return fmt.Sprintf("(content load failed: %v)", err)
+	}
+	return strings.TrimSpace(string(data))
+}
+
+// forceDone runs one final hop with a hard "emit done NOW" prompt so
+// the loop can exit gracefully on a stubborn model. Returns
+// (answer, reasoning, cited_ranges). When the model still doesn't
+// emit a valid done action, the empty values flow back and the
+// caller sees a hop-capped Result.
+func (s *PageIndexStrategy) forceDone(ctx context.Context, msgs *[]llmgate.Message, totalUsage *Usage, hopsTaken *int, model string, maxPage int) (string, string, []pageRange) {
+	*msgs = append(*msgs, llmgate.Message{
+		Role:    llmgate.RoleUser,
+		Content: "You have used your tool-call budget. Reply NOW with one JSON object: {\"tool\":\"done\",\"answer\":\"<your best answer\",\"cited_pages\":[[start,end],...],\"reasoning\":\"why\"}. Do not call any more tools. Do not emit prose.",
+	})
+	req := llmgate.Request{
+		Model:       model,
+		Messages:    *msgs,
+		MaxTokens:   1536,
+		Temperature: 0,
+	}
+	resp, err := s.LLM.Complete(ctx, req)
+	if err != nil {
+		return "", "", nil
+	}
+	*hopsTaken++
+	totalUsage.Add(Usage{
+		InputTokens:  resp.Usage.InputTokens,
+		OutputTokens: resp.Usage.OutputTokens,
+		TotalTokens:  resp.Usage.TotalTokens,
+		CostUSD:      resp.Usage.CostUSD,
+		LLMCalls:     1,
+	})
+	action, err := ParsePageIndexAction(resp.Content)
+	if err != nil || action.Action != pageActionDone {
+		return "", "", nil
+	}
+	return strings.TrimSpace(action.Answer), strings.TrimSpace(action.Reasoning), normaliseRanges(action.CitedPages, maxPage)
+}
+
+// --- TOC synthesis ---
+
+// tocNode is the synthesised TOC shape. It deliberately mirrors what
+// PR-A's persisted documents.toc_tree column will store (titles +
+// page ranges + nested children, NO text bodies) so the LLM sees a
+// consistent surface whether PR-A is merged or not.
+type tocNode struct {
+	ID        tree.SectionID `json:"id,omitempty"`
+	Title     string         `json:"title"`
+	PageStart int            `json:"page_start,omitempty"`
+	PageEnd   int            `json:"page_end,omitempty"`
+	Children  []tocNode      `json:"children,omitempty"`
+}
+
+// synthesiseTOC builds a TOC view from the section tree when no
+// LLM-built TOC has been persisted. Titles + page ranges only — body
+// text is intentionally dropped so the model navigates structure
+// before reaching for get_pages.
+func synthesiseTOC(t *tree.Tree) string {
+	if t == nil || t.Root == nil {
+		return "[]"
+	}
+	nodes := convertSectionToTOC(t.Root)
+	raw, err := json.MarshalIndent(nodes, "", "  ")
+	if err != nil {
+		// json.Marshal on a static struct shouldn't fail, but
+		// degrade to an empty array rather than break the loop.
+		return "[]"
+	}
+	return string(raw)
+}
+
+func convertSectionToTOC(s *tree.Section) []tocNode {
+	if s == nil {
+		return nil
+	}
+	// Root with empty ID is the synthetic wrapper buildTree adds when
+	// a document has multiple top-level sections — surface its
+	// children directly so the TOC doesn't look like one big anonymous
+	// container.
+	if s.ID == "" {
+		var out []tocNode
+		for _, c := range s.Children {
+			out = append(out, sectionToTOC(c))
+		}
+		return out
+	}
+	return []tocNode{sectionToTOC(s)}
+}
+
+func sectionToTOC(s *tree.Section) tocNode {
+	n := tocNode{
+		ID:        s.ID,
+		Title:     s.Title,
+		PageStart: s.PageStart,
+		PageEnd:   s.PageEnd,
+	}
+	for _, c := range s.Children {
+		n.Children = append(n.Children, sectionToTOC(c))
+	}
+	return n
+}
+
+// --- page range maths ---
+
+// pageRange is an inclusive page range. The strategy uses it for
+// citations and for the trace-token input.
+type pageRange struct {
+	Start int
+	End   int
+}
+
+// String formats as "start-end" so trace tokens compute over a stable
+// human-readable form. computePageIndexTraceToken sorts these
+// strings before hashing.
+func (p pageRange) String() string {
+	if p.Start == p.End {
+		return strconv.Itoa(p.Start)
+	}
+	return fmt.Sprintf("%d-%d", p.Start, p.End)
+}
+
+// sectionPageEntry is a flat section view ordered by page. The
+// strategy keeps the title + summary inline because get_pages /
+// renderPages reads them both per call, and the fallback path needs
+// summaries when the loader is nil.
+type sectionPageEntry struct {
+	id         tree.SectionID
+	title      string
+	summary    string
+	contentRef string
+	start      int
+	end        int
+}
+
+// flattenSectionsByPage walks the tree and returns every section
+// that carries a page range, sorted by start page (ties broken by
+// end page). Internal nodes whose [start,end] is zero are dropped —
+// they don't contribute content to get_pages and would noise up the
+// overlap check.
+func flattenSectionsByPage(t *tree.Tree) []sectionPageEntry {
+	if t == nil || t.Root == nil {
+		return nil
+	}
+	var out []sectionPageEntry
+	t.Root.Walk(func(s *tree.Section) bool {
+		if s.PageStart <= 0 || s.PageEnd <= 0 {
+			return true
+		}
+		out = append(out, sectionPageEntry{
+			id:         s.ID,
+			title:      s.Title,
+			summary:    s.Summary,
+			contentRef: s.ContentRef,
+			start:      s.PageStart,
+			end:        s.PageEnd,
+		})
+		return true
+	})
+	sort.SliceStable(out, func(i, j int) bool {
+		if out[i].start != out[j].start {
+			return out[i].start < out[j].start
+		}
+		return out[i].end < out[j].end
+	})
+	return out
+}
+
+// maxKnownPage returns the highest PageEnd across the flattened
+// section list. Used to clamp model-emitted ranges and to give the
+// model a clear "max page = N" hint in the initial prompt.
+func maxKnownPage(sections []sectionPageEntry) int {
+	max := 0
+	for _, s := range sections {
+		if s.end > max {
+			max = s.end
+		}
+	}
+	return max
+}
+
+// overlaps reports whether two inclusive ranges intersect.
+func overlaps(aStart, aEnd, bStart, bEnd int) bool {
+	if aStart <= 0 || aEnd <= 0 || bStart <= 0 || bEnd <= 0 {
+		return false
+	}
+	return aStart <= bEnd && bStart <= aEnd
+}
+
+// clampRange validates a model-emitted [start,end] against the
+// document's actual page range. Returns (start, end, ok=false) when
+// the range is unusable (zero pages, inverted, or entirely past the
+// document). When the range partially overlaps the document the ends
+// are clamped to [1, maxPage] and the call returns ok=true so the
+// model can keep navigating from a slightly-corrected range rather
+// than spinning on the same error.
+func clampRange(start, end, maxPage int) (int, int, bool) {
+	if start <= 0 && end <= 0 {
+		return 0, 0, false
+	}
+	if start <= 0 {
+		start = 1
+	}
+	if end <= 0 {
+		end = start
+	}
+	if start > end {
+		start, end = end, start
+	}
+	if maxPage > 0 {
+		if start > maxPage {
+			return start, end, false
+		}
+		if end > maxPage {
+			end = maxPage
+		}
+	}
+	return start, end, true
+}
+
+// normaliseRanges collapses raw model-emitted ranges (which may be
+// flipped, zero-pages, or duplicated) into a sorted, deduplicated
+// list of valid inclusive ranges clamped to [1,maxPage]. Bad ranges
+// are silently dropped — the trace token must compute over a stable
+// canonical form regardless of how the model orders its citations.
+func normaliseRanges(raw [][2]int, maxPage int) []pageRange {
+	if len(raw) == 0 {
+		return nil
+	}
+	seen := make(map[pageRange]struct{}, len(raw))
+	out := make([]pageRange, 0, len(raw))
+	for _, r := range raw {
+		s, e, ok := clampRange(r[0], r[1], maxPage)
+		if !ok {
+			continue
+		}
+		pr := pageRange{Start: s, End: e}
+		if _, dup := seen[pr]; dup {
+			continue
+		}
+		seen[pr] = struct{}{}
+		out = append(out, pr)
+	}
+	sort.Slice(out, func(i, j int) bool {
+		if out[i].Start != out[j].Start {
+			return out[i].Start < out[j].Start
+		}
+		return out[i].End < out[j].End
+	})
+	return out
+}
+
+// sectionsOverlapping returns the IDs of every section whose
+// [PageStart, PageEnd] overlaps any of the cited ranges. Preserves
+// document order (because sections is page-sorted) and deduplicates.
+// This is the bridge that turns the model's page-based citations
+// into the section-ID list every other endpoint already expects.
+func sectionsOverlapping(sections []sectionPageEntry, ranges []pageRange) []tree.SectionID {
+	if len(ranges) == 0 || len(sections) == 0 {
+		return nil
+	}
+	seen := make(map[tree.SectionID]struct{}, len(sections))
+	out := make([]tree.SectionID, 0, len(sections))
+	for _, sec := range sections {
+		for _, r := range ranges {
+			if overlaps(sec.start, sec.end, r.Start, r.End) {
+				if _, dup := seen[sec.id]; !dup {
+					seen[sec.id] = struct{}{}
+					out = append(out, sec.id)
+				}
+				break
+			}
+		}
+	}
+	return out
+}
+
+// computePageIndexTraceToken builds the replay token for a
+// PageIndex run. Page-based strategies don't pick section IDs the
+// way agentic/single-pass do, so the token's "identity" inputs are
+// the document, the model, and the sorted cited page ranges. Two
+// runs that cite the same pages (even via different navigation
+// paths) collapse to the same token — same property as
+// ComputeTraceToken offers for section IDs.
+//
+// The hashing primitive (sha256, NUL separators, lowercase hex) is
+// reused so /v1/replay handles both shapes uniformly.
+func computePageIndexTraceToken(docID tree.DocumentID, model string, ranges []pageRange) string {
+	strs := make([]string, len(ranges))
+	for i, r := range ranges {
+		strs[i] = r.String()
+	}
+	sort.Strings(strs)
+	// Trace-token IDs are constructed from sorted page-range strings
+	// rather than section IDs. We feed them through the existing
+	// ComputeTraceToken helper for shape consistency — its
+	// sort-then-hash semantics happens to be exactly what we want
+	// here too. The strategy's stable identifier ("pageindex") is
+	// folded into the "model" position so a page-based run and a
+	// section-based run on the same doc/model don't collide.
+	tagged := make([]tree.SectionID, len(strs))
+	for i, s := range strs {
+		tagged[i] = tree.SectionID("p:" + s)
+	}
+	return ComputeTraceToken(docID, traceDocVersionV1+"-pages", strategyNamePageIndex+":"+model, tagged)
+}
+
+// --- action protocol ---
+
+// PageIndexAction is the LLM-chosen next step in the loop. The model
+// emits one of these per turn as a JSON object on the
+// 'tool' tag. The Action field is uppercase-tolerant on input;
+// ParsePageIndexAction lowercases before dispatch.
+type PageIndexAction struct {
+	// Action is the dispatch tag (alias: tool). One of:
+	// get_document_structure, get_pages, done.
+	Action string `json:"tool"`
+
+	// ActionAlt lets the model use "action" instead of "tool". Some
+	// providers struggle to consistently emit the same key when both
+	// shapes are documented. We accept either; ActionAlt wins iff
+	// Action is empty.
+	ActionAlt string `json:"action,omitempty"`
+
+	// StartPage / EndPage are the inclusive 1-indexed range a
+	// get_pages call targets.
+	StartPage int `json:"start_page,omitempty"`
+	EndPage   int `json:"end_page,omitempty"`
+
+	// Pages is an alternate shape some models reach for: a
+	// "5-7"-style string. ParsePageIndexAction splits it into
+	// StartPage/EndPage when present.
+	Pages string `json:"pages,omitempty"`
+
+	// Answer is the natural-language answer for a done action.
+	Answer string `json:"answer,omitempty"`
+
+	// CitedPages is the list of inclusive page ranges the answer
+	// relies on for a done action. Each entry is [start, end]; a
+	// single page can be expressed as [5,5].
+	CitedPages [][2]int `json:"cited_pages,omitempty"`
+
+	// Reasoning is the per-call explanation the system prompt
+	// asks the model to emit. Surfaced into the reasoning_trace
+	// when the endpoint is called with ?reasoning=true.
+	Reasoning string `json:"reasoning,omitempty"`
+}
+
+// Action tag constants. Mirrors PageIndex's reference SDK tool
+// names so prompt-engineering work over there translates over.
+const (
+	pageActionStructure = "get_document_structure"
+	pageActionGetPages  = "get_pages"
+	pageActionDone      = "done"
+)
+
+// pageIndexParseRetryPrompt nudges the model back onto the
+// JSON-action protocol after a parse failure. Aligned with
+// AgenticStrategy's retry path — same wording so behaviour stays
+// consistent.
+const pageIndexParseRetryPrompt = "Your last reply was not a valid JSON tool call. Reply with EXACTLY one JSON object: {\"tool\":\"get_document_structure|get_pages|done\", ...}. No prose, no markdown fences."
+
+// ParsePageIndexAction is the tolerant JSON decoder for the
+// page-based protocol. Behaviour mirrors ParseAction (the older
+// agentic protocol's parser): strip code fences, peel prose
+// wrappers, isolate the first balanced JSON object, and
+// case-fold the action tag.
+//
+// Additional tolerance vs ParseAction:
+//   - "tool" or "action" can name the action.
+//   - Pages can be a "5-7" string instead of explicit
+//     start_page/end_page.
+//   - cited_pages can be either [[5,7],[10,10]] (preferred) or
+//     ["5-7","10"] (tolerated).
+func ParsePageIndexAction(raw string) (PageIndexAction, error) {
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return PageIndexAction{}, fmt.Errorf("empty response")
+	}
+	if strings.HasPrefix(raw, "```") {
+		if i := strings.Index(raw, "\n"); i >= 0 {
+			raw = raw[i+1:]
+		}
+		raw = strings.TrimSuffix(raw, "```")
+		raw = strings.TrimSpace(raw)
+	}
+	if i := strings.Index(raw, "{"); i > 0 {
+		raw = raw[i:]
+	}
+	if j := strings.LastIndex(raw, "}"); j >= 0 && j < len(raw)-1 {
+		raw = raw[:j+1]
+	}
+
+	// We decode in two passes so a flexibly-typed cited_pages field
+	// (either [[1,2],[5,7]] or ["1-2","5-7"]) doesn't tank the whole
+	// action.
+	//
+	// Pass 1: decode into a map[string]json.RawMessage so each field
+	// can be parsed independently. This is more tolerant than a
+	// single-pass typed decode because a single bad field doesn't
+	// invalidate the rest of the JSON.
+	var fields map[string]json.RawMessage
+	if err := json.Unmarshal([]byte(raw), &fields); err != nil {
+		return PageIndexAction{}, fmt.Errorf("decode pageindex action: %w", err)
+	}
+
+	var a PageIndexAction
+	if v, ok := fields["tool"]; ok {
+		_ = json.Unmarshal(v, &a.Action)
+	}
+	if a.Action == "" {
+		if v, ok := fields["action"]; ok {
+			_ = json.Unmarshal(v, &a.Action)
+		}
+	}
+	a.Action = strings.ToLower(strings.TrimSpace(a.Action))
+	if a.Action == "" {
+		return PageIndexAction{}, fmt.Errorf("missing 'tool' or 'action' field")
+	}
+
+	if v, ok := fields["start_page"]; ok {
+		_ = json.Unmarshal(v, &a.StartPage)
+	}
+	if v, ok := fields["end_page"]; ok {
+		_ = json.Unmarshal(v, &a.EndPage)
+	}
+	if v, ok := fields["pages"]; ok {
+		_ = json.Unmarshal(v, &a.Pages)
+	}
+	if v, ok := fields["answer"]; ok {
+		_ = json.Unmarshal(v, &a.Answer)
+	}
+	if v, ok := fields["reasoning"]; ok {
+		_ = json.Unmarshal(v, &a.Reasoning)
+	}
+
+	// cited_pages: try the typed shape first ([[1,2],[5,7]]); fall
+	// back to the string-shape (["1-2","5-7"]) when the typed
+	// decode fails or is empty.
+	if v, ok := fields["cited_pages"]; ok && len(v) > 0 {
+		if err := json.Unmarshal(v, &a.CitedPages); err != nil || len(a.CitedPages) == 0 {
+			a.CitedPages = nil
+			var asStrings []string
+			if err := json.Unmarshal(v, &asStrings); err == nil {
+				for _, p := range asStrings {
+					s, e, ok := parsePageRangeString(p)
+					if !ok {
+						continue
+					}
+					a.CitedPages = append(a.CitedPages, [2]int{s, e})
+				}
+			}
+		}
+	}
+
+	// Pages-string → start/end normalisation. Only fills in when
+	// the typed fields weren't already populated.
+	if a.Pages != "" && a.StartPage == 0 && a.EndPage == 0 {
+		s, e, ok := parsePageRangeString(a.Pages)
+		if ok {
+			a.StartPage = s
+			a.EndPage = e
+		}
+	}
+
+	return a, nil
+}
+
+// parsePageRangeString parses "5", "5-7", or "5,7" (the loosest
+// shape the model is allowed to emit). Returns (start, end, true)
+// on success; (0, 0, false) otherwise. "5,7" is treated as
+// start=5,end=7 (we don't support multi-range here — that's what
+// cited_pages is for).
+func parsePageRangeString(s string) (int, int, bool) {
+	s = strings.TrimSpace(s)
+	if s == "" {
+		return 0, 0, false
+	}
+	sep := -1
+	for i, c := range s {
+		if c == '-' || c == ',' {
+			sep = i
+			break
+		}
+	}
+	if sep < 0 {
+		n, err := strconv.Atoi(s)
+		if err != nil || n <= 0 {
+			return 0, 0, false
+		}
+		return n, n, true
+	}
+	a, err1 := strconv.Atoi(strings.TrimSpace(s[:sep]))
+	b, err2 := strconv.Atoi(strings.TrimSpace(s[sep+1:]))
+	if err1 != nil || err2 != nil || a <= 0 || b <= 0 {
+		return 0, 0, false
+	}
+	return a, b, true
+}
+
+// wrapPageObservation formats a tool's result so the model can
+// clearly see which tool produced which observation. Same shape as
+// AgenticStrategy.wrapObservation but with tool-call wording.
+func wrapPageObservation(tool, body string) string {
+	return fmt.Sprintf("Tool result (%s):\n%s\n\nNext JSON tool call?", tool, body)
+}
+
+// --- system prompt ---
+
+// pageIndexSystemPrompt instructs the model on the navigation loop.
+// The wording is a faithful port of the reference PageIndex demo's
+// AGENT_SYSTEM_PROMPT (see PageIndex/examples/agentic_vectorless_rag_demo.py:44-52),
+// adapted to the JSON-action protocol vle uses in lieu of native
+// llmgate tool calling.
+//
+// Key invariants that show up in tests:
+//   - Always call get_document_structure first.
+//   - Use tight page ranges; never fetch the whole document.
+//   - Emit a one-sentence reason before each tool call.
+//   - Answer only from tool output (no priors).
+//   - End with a done action carrying answer + cited_pages.
+const pageIndexSystemPrompt = `You are a document QA assistant navigating a paginated document.
+
+TOOL USE PROTOCOL:
+- Reply with EXACTLY one JSON object per turn. No prose, no markdown fences.
+- Always call get_document_structure first to see titles + page ranges.
+- Call get_pages with TIGHT page ranges (e.g. {"tool":"get_pages","start_page":5,"end_page":7}). Never fetch the whole document.
+- Before each tool call, populate the "reasoning" field with ONE short sentence explaining why you're calling it.
+- When you have enough evidence, emit done with the natural-language answer, the page ranges you relied on, and a one-line reasoning trace.
+
+RULES:
+- Answer based ONLY on tool output. Do not invent facts.
+- Cite by page range, not by section title.
+- Be concise. Single-paragraph answers when possible.
+- If nothing in the document answers the query, emit done with answer="The document does not address this query." and an empty cited_pages array.`
+
+// pageIndexActionHelp is the one-shot reminder appended to the
+// initial user prompt so the model gets concrete examples without us
+// needing to maintain a separate few-shot block.
+const pageIndexActionHelp = `- {"tool":"get_document_structure","reasoning":"orient by titles"} — fetch the TOC tree (titles + page ranges, no body text)
+- {"tool":"get_pages","start_page":5,"end_page":7,"reasoning":"section on debt"} — fetch text covering pages 5-7
+- {"tool":"done","answer":"...","cited_pages":[[5,7],[12,12]],"reasoning":"the answer is grounded on these pages"} — final answer
+
+Reply with ONLY the JSON object.`
diff --git a/pkg/retrieval/pageindex_strategy_test.go b/pkg/retrieval/pageindex_strategy_test.go
new file mode 100644
index 0000000..e6b9e0d
--- /dev/null
+++ b/pkg/retrieval/pageindex_strategy_test.go
@@ -0,0 +1,750 @@
+package retrieval_test
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+
+	"github.com/hallelx2/llmgate"
+
+	"github.com/hallelx2/vectorless-engine/pkg/retrieval"
+	"github.com/hallelx2/vectorless-engine/pkg/tree"
+)
+
+// pageScriptedLLM is a scriptedLLM for the PageIndex strategy.
+// Each Complete call returns the next canned response. When the
+// script is exhausted, loopReply (if set) is returned on every
+// subsequent call — the hop-cap test uses this to simulate a model
+// that never emits done.
+type pageScriptedLLM struct {
+	replies   []string
+	loopReply string
+
+	calls int32
+
+	mu          sync.Mutex
+	lastPrompts []string
+}
+
+func (p *pageScriptedLLM) Complete(ctx context.Context, req llmgate.Request) (*llmgate.Response, error) {
+	i := int(atomic.AddInt32(&p.calls, 1)) - 1
+
+	var userMsg string
+	for _, msg := range req.Messages {
+		if msg.Role == llmgate.RoleUser {
+			userMsg = msg.Content
+		}
+	}
+	p.mu.Lock()
+	p.lastPrompts = append(p.lastPrompts, userMsg)
+	p.mu.Unlock()
+
+	if i < len(p.replies) {
+		return &llmgate.Response{Content: p.replies[i]}, nil
+	}
+	if p.loopReply != "" {
+		return &llmgate.Response{Content: p.loopReply}, nil
+	}
+	return nil, errors.New("pageScriptedLLM: replies exhausted")
+}
+
+func (p *pageScriptedLLM) CountTokens(ctx context.Context, t string) (int, error) {
+	return len(t) / 4, nil
+}
+
+// pageMapLoader is an in-memory PageContentLoader backed by a map.
+type pageMapLoader struct{ data map[string]string }
+
+func (m pageMapLoader) Load(ctx context.Context, ref string) ([]byte, error) {
+	v, ok := m.data[ref]
+	if !ok {
+		return nil, errors.New("not found")
+	}
+	return []byte(v), nil
+}
+
+// pageStaticTOC is a TOCProvider that returns a canned JSON blob.
+// Tests use this to assert the get_document_structure observation
+// surfaces the persisted TOC ahead of the synthesised fallback.
+type pageStaticTOC struct{ blob []byte }
+
+func (p pageStaticTOC) GetTOC(ctx context.Context, _ tree.DocumentID) ([]byte, error) {
+	return p.blob, nil
+}
+
+// pageErroringTOC simulates documents.toc_tree being NULL (no
+// LLM-built TOC yet). The strategy must degrade to the synthesised
+// view rather than failing the request.
+type pageErroringTOC struct{}
+
+func (pageErroringTOC) GetTOC(ctx context.Context, _ tree.DocumentID) ([]byte, error) {
+	return nil, retrieval.ErrNoTOC
+}
+
+// buildPagedTree mirrors buildAgenticTree but stamps page_start /
+// page_end on every section so PageIndexStrategy can navigate. The
+// shape:
+//
+//	sec_root → [sec_a (1-4), sec_b (5-9)]
+//	  sec_a → [sec_a1 (1-2 install), sec_a2 (3-4 config)]
+//	  sec_b → [sec_b1 (5-7 querying), sec_b2 (8-9 debt)]
+func buildPagedTree() *tree.Tree {
+	a1 := &tree.Section{ID: "sec_a1", ParentID: "sec_a", Title: "Install", Summary: "install steps", ContentRef: "a1_ref", PageStart: 1, PageEnd: 2}
+	a2 := &tree.Section{ID: "sec_a2", ParentID: "sec_a", Title: "Config", Summary: "config keys", ContentRef: "a2_ref", PageStart: 3, PageEnd: 4}
+	b1 := &tree.Section{ID: "sec_b1", ParentID: "sec_b", Title: "Querying", Summary: "how to query", ContentRef: "b1_ref", PageStart: 5, PageEnd: 7}
+	b2 := &tree.Section{ID: "sec_b2", ParentID: "sec_b", Title: "Debt", Summary: "long-term debt", ContentRef: "b2_ref", PageStart: 8, PageEnd: 9}
+	a := &tree.Section{ID: "sec_a", ParentID: "sec_root", Title: "Setup", Summary: "setup section", Children: []*tree.Section{a1, a2}, PageStart: 1, PageEnd: 4}
+	b := &tree.Section{ID: "sec_b", ParentID: "sec_root", Title: "Usage", Summary: "usage section", Children: []*tree.Section{b1, b2}, PageStart: 5, PageEnd: 9}
+	root := &tree.Section{ID: "sec_root", Title: "Atlas", Children: []*tree.Section{a, b}, PageStart: 1, PageEnd: 9}
+	return &tree.Tree{DocumentID: "doc_x", Title: "Atlas", Root: root}
+}
+
+// TestPageIndexHappyPath drives the canonical 3-tool sequence:
+// structure → get_pages → done. We assert the strategy:
+//   - returns the answer string in Result.Reasoning
+//   - lists the section IDs whose page range overlaps the citation
+//   - records the get_pages call in PagesRead
+//   - tracks HopsTaken correctly
+//   - computes a non-empty TraceToken keyed by the cited pages
+func TestPageIndexHappyPath(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_document_structure","reasoning":"orient"}`,
+			`{"tool":"get_pages","start_page":1,"end_page":2,"reasoning":"install lives near the front"}`,
+			`{"tool":"done","answer":"Run vle ingest then start the server.","cited_pages":[[1,2]],"reasoning":"install steps live on pages 1-2"}`,
+		},
+	}
+	loader := pageMapLoader{data: map[string]string{
+		"a1_ref": "Install steps: run vle ingest...",
+		"a2_ref": "Config keys: VLE_*",
+		"b1_ref": "How to query the API.",
+		"b2_ref": "Debt registration is in line items A and B.",
+	}}
+
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = loader
+
+	res, err := s.SelectWithCost(context.Background(), tr, "how do I install?", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if res.HopsTaken != 3 {
+		t.Errorf("HopsTaken = %d, want 3", res.HopsTaken)
+	}
+	if res.Usage.LLMCalls != 3 {
+		t.Errorf("Usage.LLMCalls = %d, want 3", res.Usage.LLMCalls)
+	}
+	if !strings.Contains(res.Reasoning, "vle ingest") {
+		t.Errorf("Reasoning (answer) must contain the model's reply, got %q", res.Reasoning)
+	}
+	if len(res.SelectedIDs) == 0 {
+		t.Fatalf("SelectedIDs must include sections covering pages 1-2, got %v", res.SelectedIDs)
+	}
+	// sec_a1 (1-2) is the leaf — must be in the list. sec_a (1-4)
+	// and the synthetic sec_root (1-9) overlap too because page
+	// ranges intersect. The strategy's job is to surface ANY section
+	// whose [page_start,page_end] overlaps the citation; the API
+	// layer narrows further if it cares.
+	wantIDs := map[tree.SectionID]bool{"sec_a1": true, "sec_a": true, "sec_root": true}
+	for _, id := range res.SelectedIDs {
+		if !wantIDs[id] {
+			t.Errorf("unexpected section ID %q (only sections overlapping pages 1-2 may appear)", id)
+		}
+	}
+	if _, ok := indexOfSection(res.SelectedIDs, "sec_a1"); !ok {
+		t.Errorf("sec_a1 must be in SelectedIDs, got %v", res.SelectedIDs)
+	}
+	if len(res.PagesRead) != 1 {
+		t.Fatalf("PagesRead = %v, want 1 entry", res.PagesRead)
+	}
+	if res.PagesRead[0].StartPage != 1 || res.PagesRead[0].EndPage != 2 {
+		t.Errorf("PagesRead[0] = %+v, want 1-2", res.PagesRead[0])
+	}
+	if res.PagesRead[0].CharCount == 0 {
+		t.Errorf("PagesRead[0].CharCount must be non-zero, got %d", res.PagesRead[0].CharCount)
+	}
+	if res.TraceToken == "" {
+		t.Errorf("TraceToken must be populated on success")
+	}
+
+	// Assert the second prompt — the one that follows the
+	// get_document_structure call — actually surfaced the
+	// synthesised TOC (since no TOC provider was wired). It must
+	// contain section titles.
+	llm.mu.Lock()
+	defer llm.mu.Unlock()
+	if len(llm.lastPrompts) < 2 {
+		t.Fatalf("expected at least 2 prompts captured, got %d", len(llm.lastPrompts))
+	}
+	if !strings.Contains(llm.lastPrompts[1], "Install") {
+		t.Errorf("get_document_structure observation should include section titles; got:\n%s", llm.lastPrompts[1])
+	}
+	if !strings.Contains(llm.lastPrompts[2], "Install steps: run vle ingest") {
+		t.Errorf("get_pages observation should include loaded content; got:\n%s", llm.lastPrompts[2])
+	}
+}
+
+// TestPageIndexMultiRangeDone covers a done with two cited ranges:
+// the strategy must surface every section that overlaps EITHER
+// range. This is the FinanceBench-shaped pattern: an answer that
+// pulls evidence from two unrelated parts of a 10-K.
+func TestPageIndexMultiRangeDone(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_document_structure"}`,
+			`{"tool":"get_pages","start_page":3,"end_page":4}`,
+			`{"tool":"get_pages","start_page":8,"end_page":9}`,
+			`{"tool":"done","answer":"Config is X. Debt is Y.","cited_pages":[[3,4],[8,9]]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{
+		"a2_ref": "Config keys: VLE_*",
+		"b2_ref": "Debt registration is in line items A and B.",
+	}}
+
+	res, err := s.SelectWithCost(context.Background(), tr, "config and debt?", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if res.HopsTaken != 4 {
+		t.Errorf("HopsTaken = %d, want 4", res.HopsTaken)
+	}
+	if len(res.PagesRead) != 2 {
+		t.Fatalf("PagesRead = %v, want 2 entries", res.PagesRead)
+	}
+	wantSecs := map[tree.SectionID]bool{
+		"sec_a2": true, "sec_b2": true, // direct leaf overlaps
+		"sec_a": true, "sec_b": true, // parents overlap too
+		"sec_root": true, // doc-wide root overlaps every range
+	}
+	got := make(map[tree.SectionID]bool, len(res.SelectedIDs))
+	for _, id := range res.SelectedIDs {
+		got[id] = true
+		if !wantSecs[id] {
+			t.Errorf("unexpected section ID %q", id)
+		}
+	}
+	// Leaves are the load-bearing requirement; parents are
+	// allowed-not-required (a future tightening could skip them, and
+	// the strategy contract stays useful either way).
+	for _, id := range []tree.SectionID{"sec_a2", "sec_b2"} {
+		if !got[id] {
+			t.Errorf("missing section ID %q from SelectedIDs", id)
+		}
+	}
+}
+
+// TestPageIndexMaxHopsForcesDone confirms a runaway loop is killed:
+// the model emits get_pages on every turn but never done. The
+// strategy must cap at MaxHops, force a done on the last hop, and
+// surface a Result with HopsTaken == MaxHops+1 (the +1 for the
+// forced terminal call).
+func TestPageIndexMaxHopsForcesDone(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		// Every loop reply is a fresh get_pages — never done.
+		loopReply: `{"tool":"get_pages","start_page":1,"end_page":2}`,
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{"a1_ref": "install"}}
+	s.MaxHops = 3
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if res.HopsTaken < 3 {
+		t.Errorf("HopsTaken = %d, want >= 3 (cap hit)", res.HopsTaken)
+	}
+	// The model never emits done so even after force-done attempt
+	// the answer should be empty (force-done's response is also a
+	// get_pages, which fails to parse as done).
+	if strings.TrimSpace(res.Reasoning) != "" {
+		t.Errorf("answer must be empty when model never finalises, got %q", res.Reasoning)
+	}
+	// The get_pages calls that fired BEFORE the cap should still be
+	// surfaced in PagesRead so callers can see what the model tried.
+	if len(res.PagesRead) == 0 {
+		t.Error("PagesRead must capture pre-cap navigation footprint")
+	}
+}
+
+// TestPageIndexMaxHopsForceDoneSucceeds covers the recovery path:
+// the loop hit MaxHops, but on the forced-done turn the model
+// actually emits a valid done. The strategy must collect the
+// answer + citations from that final turn rather than dropping them.
+func TestPageIndexMaxHopsForceDoneSucceeds(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_pages","start_page":1,"end_page":2}`,
+			`{"tool":"get_pages","start_page":3,"end_page":4}`,
+			// Once force-done fires, this becomes the model's response.
+			`{"tool":"done","answer":"forced answer","cited_pages":[[1,2]]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{"a1_ref": "install", "a2_ref": "config"}}
+	s.MaxHops = 2
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if res.Reasoning != "forced answer" {
+		t.Errorf("forced-done answer = %q, want %q", res.Reasoning, "forced answer")
+	}
+	if len(res.SelectedIDs) == 0 {
+		t.Error("forced-done citations must populate SelectedIDs")
+	}
+}
+
+// TestPageIndexTOCFallback exercises the graceful-degradation path:
+// when the persisted TOC provider returns ErrNoTOC (pre-PR-A
+// state), the strategy synthesises a TOC view from the section
+// tree. The model must still receive section titles + page ranges.
+func TestPageIndexTOCFallback(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_document_structure"}`,
+			`{"tool":"done","answer":"see structure","cited_pages":[]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{}}
+	s.TOC = pageErroringTOC{} // mimic documents.toc_tree IS NULL
+
+	res, err := s.SelectWithCost(context.Background(), tr, "what's in the doc?", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if res.HopsTaken != 2 {
+		t.Errorf("HopsTaken = %d, want 2", res.HopsTaken)
+	}
+
+	llm.mu.Lock()
+	defer llm.mu.Unlock()
+	if len(llm.lastPrompts) < 2 {
+		t.Fatalf("expected 2 prompts, got %d", len(llm.lastPrompts))
+	}
+	// The fallback synthesised TOC must include each leaf title.
+	obs := llm.lastPrompts[1]
+	for _, want := range []string{"Install", "Config", "Querying", "Debt", "page_start"} {
+		if !strings.Contains(obs, want) {
+			t.Errorf("synthesised TOC missing %q in observation:\n%s", want, obs)
+		}
+	}
+}
+
+// TestPageIndexTOCFromProvider asserts the persisted TOC wins over
+// the synthesised view: when the provider returns bytes, those
+// bytes are surfaced verbatim.
+func TestPageIndexTOCFromProvider(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_document_structure"}`,
+			`{"tool":"done","answer":"from persisted TOC","cited_pages":[]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.TOC = pageStaticTOC{blob: []byte(`[{"title":"OVERRIDDEN","page_start":1,"page_end":99}]`)}
+
+	_, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+
+	llm.mu.Lock()
+	defer llm.mu.Unlock()
+	if !strings.Contains(llm.lastPrompts[1], "OVERRIDDEN") {
+		t.Errorf("persisted TOC blob must be surfaced verbatim, got:\n%s", llm.lastPrompts[1])
+	}
+	if strings.Contains(llm.lastPrompts[1], "Install") {
+		t.Errorf("persisted TOC should win — the synthesised one mustn't leak through")
+	}
+}
+
+// TestPageIndexBadJSONGraceful: persistent prose responses must
+// trigger a retry prompt and then bail cleanly at MaxHops.
+func TestPageIndexBadJSONGraceful(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		loopReply: "I think the answer is on page 5.", // never JSON
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{}}
+	s.MaxHops = 3
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("want nil error on persistent parse failure, got %v", err)
+	}
+	if strings.TrimSpace(res.Reasoning) != "" {
+		t.Errorf("answer must be empty when no done emitted, got %q", res.Reasoning)
+	}
+	if len(res.PagesRead) != 0 {
+		t.Errorf("PagesRead must be empty when every turn fails to parse, got %v", res.PagesRead)
+	}
+}
+
+// TestPageIndexClampInvalidRange: a model that asks for pages past
+// the document's end gets a recoverable error observation and can
+// keep going. The strategy must NOT crash on out-of-range input.
+func TestPageIndexClampInvalidRange(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree() // max page is 9
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_pages","start_page":100,"end_page":105}`, // past the end
+			`{"tool":"done","answer":"recovered","cited_pages":[[1,1]]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{"a1_ref": "install"}}
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if res.Reasoning != "recovered" {
+		t.Errorf("recovery answer = %q, want %q", res.Reasoning, "recovered")
+	}
+
+	llm.mu.Lock()
+	defer llm.mu.Unlock()
+	// The bad get_pages must surface "invalid range" so the model
+	// has something to react to.
+	if !strings.Contains(llm.lastPrompts[1], "invalid range") {
+		t.Errorf("out-of-range get_pages should produce an 'invalid range' observation; got:\n%s", llm.lastPrompts[1])
+	}
+}
+
+// TestPageIndexClampPartialOverlap: a range that overlaps the
+// document but extends past the end is silently clamped — the
+// model gets useful content (not an error) for the in-range part.
+func TestPageIndexClampPartialOverlap(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree() // max page is 9
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_pages","start_page":8,"end_page":50}`, // 8 is valid, 50 is past
+			`{"tool":"done","answer":"got it","cited_pages":[[8,9]]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{"b2_ref": "Debt content."}}
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if len(res.PagesRead) != 1 {
+		t.Fatalf("PagesRead = %v, want 1 entry", res.PagesRead)
+	}
+	if res.PagesRead[0].EndPage != 9 {
+		t.Errorf("end page should be clamped to 9, got %d", res.PagesRead[0].EndPage)
+	}
+}
+
+// TestPageIndexEmptyTree exercises the early-return guard.
+func TestPageIndexEmptyTree(t *testing.T) {
+	t.Parallel()
+
+	llm := &pageScriptedLLM{}
+	s := retrieval.NewPageIndexStrategy(llm)
+
+	res, err := s.SelectWithCost(context.Background(), &tree.Tree{}, "q", retrieval.ContextBudget{})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(res.SelectedIDs) != 0 {
+		t.Errorf("empty tree should yield empty selection, got %v", res.SelectedIDs)
+	}
+	if atomic.LoadInt32(&llm.calls) != 0 {
+		t.Errorf("empty tree should make 0 LLM calls, got %d", llm.calls)
+	}
+}
+
+// TestPageIndexNoLoaderFallback: PageLoader=nil falls back to a
+// title+summary rendering of get_pages. The model still gets a
+// useful observation so it can keep navigating.
+func TestPageIndexNoLoaderFallback(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_pages","start_page":1,"end_page":2}`,
+			`{"tool":"done","answer":"titles only","cited_pages":[[1,2]]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm) // no PageLoader
+
+	_, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+
+	llm.mu.Lock()
+	defer llm.mu.Unlock()
+	obs := llm.lastPrompts[1]
+	if !strings.Contains(obs, "Install") || !strings.Contains(obs, "install steps") {
+		t.Errorf("loader-less get_pages should fall back to title + summary; got:\n%s", obs)
+	}
+}
+
+// TestPageIndexContentClippedAtLimit: a get_pages call that would
+// produce more chars than PageContentLimit must be clipped.
+func TestPageIndexContentClippedAtLimit(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	bigBody := strings.Repeat("X", 5_000)
+	loader := pageMapLoader{data: map[string]string{
+		"a1_ref": bigBody, "a2_ref": bigBody, "b1_ref": bigBody, "b2_ref": bigBody,
+	}}
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_pages","start_page":1,"end_page":9}`,
+			`{"tool":"done","answer":"big","cited_pages":[[1,1]]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = loader
+	s.PageContentLimit = 1000
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if res.PagesRead[0].CharCount > 1000 {
+		t.Errorf("get_pages output must respect PageContentLimit=1000, got %d", res.PagesRead[0].CharCount)
+	}
+}
+
+// TestPageIndexNoCitationsClearsSelection: an empty cited_pages
+// list must produce an empty SelectedIDs (no implicit "default to
+// everything we visited"). This is the "no useful evidence found"
+// path the system prompt prescribes.
+func TestPageIndexNoCitationsClearsSelection(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_pages","start_page":1,"end_page":2}`,
+			`{"tool":"done","answer":"The document does not address this query.","cited_pages":[]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{"a1_ref": "install"}}
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if len(res.SelectedIDs) != 0 {
+		t.Errorf("empty cited_pages should yield empty SelectedIDs, got %v", res.SelectedIDs)
+	}
+	if !strings.Contains(res.Reasoning, "does not address") {
+		t.Errorf("refusal answer must propagate to Reasoning, got %q", res.Reasoning)
+	}
+}
+
+// TestPageIndexTraceTokenStable: two runs that emit identical
+// cited_pages produce identical trace tokens. Replay's substrate.
+func TestPageIndexTraceTokenStable(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	mkRun := func() string {
+		llm := &pageScriptedLLM{
+			replies: []string{
+				`{"tool":"done","answer":"X","cited_pages":[[1,2],[8,9]]}`,
+			},
+		}
+		s := retrieval.NewPageIndexStrategy(llm)
+		s.PageLoader = pageMapLoader{}
+		res, _ := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{ModelName: "gpt-4o-mini"})
+		return res.TraceToken
+	}
+	t1 := mkRun()
+	t2 := mkRun()
+	if t1 == "" || t1 != t2 {
+		t.Errorf("trace tokens must be stable across runs; got %q vs %q", t1, t2)
+	}
+}
+
+// TestPageIndexTraceTokenOrderInvariant: two runs that cite the
+// same pages in different orders must produce identical tokens.
+func TestPageIndexTraceTokenOrderInvariant(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	mkRun := func(reply string) string {
+		llm := &pageScriptedLLM{replies: []string{reply}}
+		s := retrieval.NewPageIndexStrategy(llm)
+		res, _ := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{ModelName: "gpt-4o-mini"})
+		return res.TraceToken
+	}
+	t1 := mkRun(`{"tool":"done","answer":"X","cited_pages":[[1,2],[8,9]]}`)
+	t2 := mkRun(`{"tool":"done","answer":"X","cited_pages":[[8,9],[1,2]]}`)
+	if t1 != t2 {
+		t.Errorf("trace tokens must be order-invariant; got %q vs %q", t1, t2)
+	}
+}
+
+// TestParsePageIndexActionTolerance covers the input shapes the
+// parser accepts:
+//   - "tool" key (canonical)
+//   - "action" key (alt)
+//   - "pages":"5-7" string
+//   - cited_pages as string list ["5-7","10"]
+//   - markdown fences + prose prefix
+//   - case-insensitive tool tag
+func TestParsePageIndexActionTolerance(t *testing.T) {
+	t.Parallel()
+	cases := []struct {
+		name  string
+		in    string
+		tool  string
+		start int
+		end   int
+		cited [][2]int
+	}{
+		{
+			name: "canonical_structure",
+			in:   `{"tool":"get_document_structure","reasoning":"orient"}`,
+			tool: "get_document_structure",
+		},
+		{
+			name:  "canonical_pages",
+			in:    `{"tool":"get_pages","start_page":5,"end_page":7}`,
+			tool:  "get_pages",
+			start: 5, end: 7,
+		},
+		{
+			name:  "alt_action_key",
+			in:    `{"action":"get_pages","start_page":5,"end_page":7}`,
+			tool:  "get_pages",
+			start: 5, end: 7,
+		},
+		{
+			name:  "pages_string_range",
+			in:    `{"tool":"get_pages","pages":"5-7"}`,
+			tool:  "get_pages",
+			start: 5, end: 7,
+		},
+		{
+			name:  "pages_string_single",
+			in:    `{"tool":"get_pages","pages":"12"}`,
+			tool:  "get_pages",
+			start: 12, end: 12,
+		},
+		{
+			name:  "code_fence",
+			in:    "```json\n{\"tool\":\"get_pages\",\"start_page\":3,\"end_page\":4}\n```",
+			tool:  "get_pages",
+			start: 3, end: 4,
+		},
+		{
+			name:  "prose_before",
+			in:    `Sure: {"tool":"get_pages","start_page":1,"end_page":1}`,
+			tool:  "get_pages",
+			start: 1, end: 1,
+		},
+		{
+			name:  "case_insensitive",
+			in:    `{"tool":"GET_PAGES","start_page":2,"end_page":3}`,
+			tool:  "get_pages",
+			start: 2, end: 3,
+		},
+		{
+			name:  "done_with_citations",
+			in:    `{"tool":"done","answer":"hi","cited_pages":[[1,2],[5,7]]}`,
+			tool:  "done",
+			cited: [][2]int{{1, 2}, {5, 7}},
+		},
+		{
+			name:  "done_with_string_citations",
+			in:    `{"tool":"done","answer":"hi","cited_pages":["1-2","5-7"]}`,
+			tool:  "done",
+			cited: [][2]int{{1, 2}, {5, 7}},
+		},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			got, err := retrieval.ParsePageIndexAction(c.in)
+			if err != nil {
+				t.Fatalf("parse: %v", err)
+			}
+			if got.Action != c.tool {
+				t.Errorf("Action = %q, want %q", got.Action, c.tool)
+			}
+			if got.StartPage != c.start {
+				t.Errorf("StartPage = %d, want %d", got.StartPage, c.start)
+			}
+			if got.EndPage != c.end {
+				t.Errorf("EndPage = %d, want %d", got.EndPage, c.end)
+			}
+			if len(got.CitedPages) != len(c.cited) {
+				t.Fatalf("CitedPages len = %d, want %d (got %v)", len(got.CitedPages), len(c.cited), got.CitedPages)
+			}
+			for i := range c.cited {
+				if got.CitedPages[i] != c.cited[i] {
+					t.Errorf("CitedPages[%d] = %v, want %v", i, got.CitedPages[i], c.cited[i])
+				}
+			}
+		})
+	}
+}
+
+func TestParsePageIndexActionRejectsGarbage(t *testing.T) {
+	t.Parallel()
+	for _, in := range []string{
+		"",
+		"I think it's page 5.",
+		`{"reasoning":"no tool field"}`,
+	} {
+		_, err := retrieval.ParsePageIndexAction(in)
+		if err == nil {
+			t.Errorf("want error parsing %q", in)
+		}
+	}
+}
+
+// indexOfSection is a tiny helper that says "is needle in haystack
+// and where". Mirrors slices.Index for readability — keeps the tests
+// stdlib-agnostic on older Go versions.
+func indexOfSection(haystack []tree.SectionID, needle tree.SectionID) (int, bool) {
+	for i, id := range haystack {
+		if id == needle {
+			return i, true
+		}
+	}
+	return -1, false
+}
diff --git a/pkg/retrieval/strategy.go b/pkg/retrieval/strategy.go
index 8f428f6..a3db98a 100644
--- a/pkg/retrieval/strategy.go
+++ b/pkg/retrieval/strategy.go
@@ -89,6 +89,27 @@ type Result struct {
 	// regardless of reasoning path. Empty when the strategy did not
 	// populate it (e.g. tests, fallback paths).
 	TraceToken string `json:"trace_token,omitempty"`
+
+	// PagesRead records the page ranges the strategy actually fetched
+	// during navigation. Page-based strategies (e.g. pageindex)
+	// populate this; section-by-section strategies leave it nil.
+	// Useful for the API layer's reasoning-trace surfaces and for
+	// cost/coverage debugging: a 10-K answer that read pages 50-55 +
+	// 102-104 leaves a concrete page footprint behind.
+	PagesRead []PageReadEntry `json:"pages_read,omitempty"`
+}
+
+// PageReadEntry is one get_pages tool call that materialised during a
+// page-based retrieval loop. StartPage and EndPage are inclusive,
+// 1-indexed. SectionIDs lists every section whose [PageStart,PageEnd]
+// overlapped the requested range. CharCount records the size of the
+// returned text after PageContentLimit clipping so cost reporting can
+// reflect bytes-on-the-wire, not bytes-requested.
+type PageReadEntry struct {
+	StartPage  int              `json:"start_page"`
+	EndPage    int              `json:"end_page"`
+	SectionIDs []tree.SectionID `json:"section_ids,omitempty"`
+	CharCount  int              `json:"char_count,omitempty"`
 }
 
 // Usage is the aggregated token + cost accounting across all LLM calls