From ed98c2dc9c2b8d96ce11e9087a2b9967603c647a Mon Sep 17 00:00:00 2001
From: dev360 <c.toivola@gmail.com>
Date: Thu, 21 May 2026 17:23:23 -0400
Subject: [PATCH] feat(template): add column and nth to Anchor for label
 disambiguation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A cover sheet that stacks two side-by-side blocks — for example a
``REPORTING`` block in column A and a ``BILLING`` block in column D,
each carrying its own ``Company:`` / ``Email:`` rows — used to force
both anchored fields to bind to the first occurrence of the shared
label. The canonical output would then carry the reporting block's
company under both ``reporting_company`` and ``billing_company`` keys
with no signal that the bind had collapsed.

Adds two optional fields on ``Anchor``:

- ``column: int`` — restrict the label scan to a single 0-indexed
  column. Skipping cells outside that column means the label_match in
  one block can't accidentally hit the parallel block.
- ``nth: int`` (default ``1``) — pick the N-th match (1-indexed) when
  the same label legitimately appears multiple times.

Both default to no-op behavior (``column=None`` scans the whole sheet,
``nth=1`` picks the first match), so existing templates continue to
extract identically.

Graduates the two P1-1 xfail tests in
``tests/test_field_scan_gaps.py`` and documents the disambiguators in
``docs/guides/templates.md``.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 docs/guides/templates.md      | 28 ++++++++++++++++++++++++++++
 src/crease/extractor.py       | 18 ++++++++++++++----
 src/crease/template_model.py  |  2 ++
 tests/test_field_scan_gaps.py | 11 ++---------
 4 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/docs/guides/templates.md b/docs/guides/templates.md
index 4c09fd9..118214a 100644
--- a/docs/guides/templates.md
+++ b/docs/guides/templates.md
@@ -105,6 +105,34 @@ by OR across rules. `value_pattern` is a regex full-matched against the
 stringified cell value; combine it with `column:` to pin a single
 column.
 
+## Disambiguating anchored labels
+
+When a worksheet stacks two cover-sheet-style blocks side by side and
+both carry the same labels (a "REPORTING" block in column A and a
+"BILLING" block in column D, each with its own `Company:` /
+`Email:` rows), an `anchor` whose `label_match: "Company:"` would
+default to the first hit. Two optional fields scope the search:
+
+- `column: int` — restrict the scan to a single 0-indexed column.
+- `nth: int` — pick the Nth match (1-indexed; default 1).
+
+```yaml
+fields:
+  - name: reporting_company
+    type: string
+    anchor: { label_match: "Company:", column: 0, value_at: right, offset: 1 }
+  - name: billing_company
+    type: string
+    anchor: { label_match: "Company:", column: 3, value_at: right, offset: 1 }
+  - name: section_two_carrier
+    type: string
+    anchor:
+      label_match: "SHIPPING INFORMATION"
+      nth: 2                # the second occurrence of the label
+      value_at: right
+      offset: 2
+```
+
 ## Templates that pin the read backend
 
 Crease reads spreadsheets through two interchangeable backends — calamine
diff --git a/src/crease/extractor.py b/src/crease/extractor.py
index 3f72f56..d993fbf 100644
--- a/src/crease/extractor.py
+++ b/src/crease/extractor.py
@@ -866,16 +866,26 @@ def _extract_anchored(
 def _find_anchor(grid: list[list[Any]], anchor) -> tuple[int, int] | None:
     target = anchor.label_match
     mode = anchor.match_mode
+    pinned_col = anchor.column
+    nth = max(1, anchor.nth)
+    seen = 0
     for r, row in enumerate(grid):
         for c, val in enumerate(row):
+            if pinned_col is not None and c != pinned_col:
+                continue
             if val is None:
                 continue
             s = str(val).strip()
             if mode == "exact" and s == target:
-                return (r, c)
-            if mode == "contains" and target in s:
-                return (r, c)
-            if mode == "regex" and re.search(target, s):
+                pass
+            elif mode == "contains" and target in s:
+                pass
+            elif mode == "regex" and re.search(target, s):
+                pass
+            else:
+                continue
+            seen += 1
+            if seen == nth:
                 return (r, c)
     return None
 
diff --git a/src/crease/template_model.py b/src/crease/template_model.py
index 6bace50..f3f9c96 100644
--- a/src/crease/template_model.py
+++ b/src/crease/template_model.py
@@ -51,6 +51,8 @@ class Anchor(BaseModel):
     match_mode: MatchMode = "contains"
     value_at: Direction = "right"
     offset: int = 1
+    column: int | None = None  # restrict label search to a single column; None = any column
+    nth: int = 1  # 1-indexed match to return when the label appears more than once
 
 
 class DataEnd(BaseModel):
diff --git a/tests/test_field_scan_gaps.py b/tests/test_field_scan_gaps.py
index df07300..9a1ff85 100644
--- a/tests/test_field_scan_gaps.py
+++ b/tests/test_field_scan_gaps.py
@@ -356,11 +356,6 @@ def build(wb):
 # ======================================================================
 
 
-@pytest.mark.xfail(
-    strict=True,
-    reason="P1-1: Anchor.column not yet implemented; duplicated labels in side-by-side "
-    "blocks always match the first occurrence.",
-)
 def test_anchor_column_scopes_match_to_one_column(tmp_path):
     """Two side-by-side blocks (REPORTING in col A, BILLING in col D) carry
     the same labels. ``anchor.column: 3`` should restrict the search to the
@@ -379,6 +374,7 @@ def build(wb):
         """
         template_id: anchor_column_scope
         version: 1
+        description: P1-1 fixture - anchor.column scopes label search
         entities:
           - name: cover
             cardinality: one
@@ -400,10 +396,6 @@ def build(wb):
     assert result.canonical["cover"]["billing_company"] == "Globex Corp"
 
 
-@pytest.mark.xfail(
-    strict=True,
-    reason="P1-1: Anchor.nth not yet implemented; cannot pick the Nth occurrence " "of an ambiguous label.",
-)
 def test_anchor_nth_picks_second_match(tmp_path):
     """A label ``SHIPPING INFORMATION`` appears twice on the sheet (a header
     label at row 0 and a sub-section label at row 4). ``nth: 2`` should pick
@@ -424,6 +416,7 @@ def build(wb):
         """
         template_id: anchor_nth
         version: 1
+        description: P1-1 fixture - anchor.nth picks second match
         entities:
           - name: cover
             cardinality: one