In [None]:

- from __future__ import annotations
- from dataclasses import dataclass, field
- from typing import List, Optional, Tuple, Dict, Any, Sequence, Iterable, Union
- 
- BBox = Tuple[int, int, int, int]  # (top, left, height, width)
- 
- def bbox_right(b: BBox) -> int:
-  t, l, h, w = b
-  return l + w
- 
- def bbox_bottom(b: BBox) -> int:
-  t, l, h, w = b
-  return t + h
- 
- def bbox_area(b: BBox) -> int:
-  t, l, h, w = b
-  return max(0, h) * max(0, w)
- 
- def bbox_contains(a: BBox, b: BBox) -> bool:
-  at, al, ah, aw = a
-  bt, bl, bh, bw = b
-  return (bt >= at) and (bl >= al) and (bbox_bottom(b) <= bbox_bottom(a)) and (bbox_right(b) <= bbox_right(a))
- 
- def bbox_intersects(a: BBox, b: BBox) -> bool:
-  at, al, ah, aw = a
-  bt, bl, bh, bw = b
-  a_right = bbox_right(a)
-  b_right = bbox_right(b)
-  a_bot = bbox_bottom(a)
-  b_bot = bbox_bottom(b)
-  return not (a_right <= bl or b_right <= al or a_bot <= bt or b_bot <= at)
- 
- def bbox_union(boxes: Sequence[BBox]) -> Optional[BBox]:
-  if not boxes:
-   return None
-  tops = [b[0] for b in boxes]
-  lefts = [b[1] for b in boxes]
-  bottoms = [bbox_bottom(b) for b in boxes]
-  rights = [bbox_right(b) for b in boxes]
-  t = min(tops)
-  l = min(lefts)
-  b = max(bottoms)
-  r = max(rights)
-  return (t, l, b - t, r - l)
- 
- 
- @dataclass(frozen=True)
- class CC:
-  id: str
-  bbox: BBox
-  area: int = 0
-  boxCenter: Tuple[float, float] = (0.0, 0.0)   # (yMid, xMid)
-  inkCenter: Tuple[float, float] = (0.0, 0.0)   # (yInkMean, xInkMean)
-  columnId: Optional[int] = None
- 
- 
- @dataclass(frozen=True)
- class Glyph:
-  id: str
-  bbox: BBox
-  ccIds: List[str] = field(default_factory=list)
-  lineId: Optional[str] = None
-  bandId: Optional[str] = None
-  columnId: Optional[int] = None
- 
- 
- @dataclass(frozen=True)
- class Gap:
-  id: str
-  bbox: BBox
-  width: int
-  gapClass: str  # "small" | "medium" | "long"
-  lineId: Optional[str] = None
-  bandId: Optional[str] = None
-  columnId: Optional[int] = None
- 
- 
- @dataclass(frozen=True)
- class Token:
-  id: str
-  kind: str       # "TI" | "PMI" | "TMI" | "SPACE"
-  bbox: BBox
-  lineId: Optional[str] = None
-  bandId: Optional[str] = None
-  columnId: Optional[int] = None
- 
-  def obstacleBBox(self) -> BBox:
-   # Appendix B invariant: tokens are obstacles for MER detection.
-   return self.bbox
- 
- 
- @dataclass(frozen=True)
- class TI(Token):
-  glyphIds: List[str] = field(default_factory=list)
- 
- 
- @dataclass(frozen=True)
- class PMI(Token):
-  tiIds: List[str] = field(default_factory=list)
-  gapIds: List[str] = field(default_factory=list)   # usually medium gaps consumed inside PMI
-  role: str = "unknown"  # "inlineMath" | "tallMath(TMI)" | "unknown"
- 
- 
- @dataclass(frozen=True)
- class TMI(Token):
-  parentPMIId: Optional[str] = None
-  anchorLineId: Optional[str] = None
-  anchorMERIds: List[str] = field(default_factory=list)
-  patternKind: Optional[str] = None  # "MER–TMI–MER" | "TMI–MER" | "MER–TMI"
- 
- 
- @dataclass(frozen=True)
- class SpaceToken(Token):
-  # This is the explicit long-gap-as-character token.
-  # It is NOT consumed; it remains a token and is an obstacle in G8.
-  gapId: Optional[str] = None
- 
- 
- @dataclass(frozen=True)
- class MER:
-  id: str
-  bbox: BBox
-  area: int
-  roleHints: List[str] = field(default_factory=list)  # "gutter" | "inline" | "line-anchor" | "halo" | "block" etc.
-  bandIds: List[str] = field(default_factory=list)
-  columnIds: List[int] = field(default_factory=list)
-  adjacency: Dict[str, List[str]] = field(default_factory=dict)
- 
- 
- @dataclass(frozen=True)
- class Line:
-  id: str
-  bbox: BBox
-  bandId: str
-  columnId: int
-  glyphIds: List[str] = field(default_factory=list)
-  tokenIds: List[str] = field(default_factory=list)
-  merLeftId: Optional[str] = None
-  merRightId: Optional[str] = None
- 
- 
- @dataclass(frozen=True)
- class Band:
-  id: str
-  rowTop: int
-  rowBottom: int
- 
- 
- @dataclass(frozen=True)
- class Column:
-  id: str
-  xLeft: int
-  xRight: int
-  gutterMERLeftId: Optional[str] = None
-  gutterMERRightId: Optional[str] = None
- 
- 
- @dataclass(frozen=True)
- class Page:
-  id: str
-  width: int
-  height: int
-  dpi: int = 300
-  polarity: str = "ink0_bg255"
-  objects: Dict[str, Any] = field(default_factory=dict)  # registry by id
- 
-  def add(self, obj: Any) -> None:
-   self.objects[getattr(obj, "id")] = obj
- 
-  def get(self, objId: str) -> Any:
-   return self.objects[objId]
- 
- 
- 
- class WaterGeoV2:
-  def __init__(self) -> None:
-   self.debug: Dict[str, Any] = {}
- 
-  # -------------------------
-  # G7 stubs: gaps, islands, SPACE tokens
-  # -------------------------
- 
-  def classifyGaps(self, line: Line, glyphs: List[Glyph]) -> List[Gap]:
-   # TODO: implement real gap measurement and thresholds.
-   # Stub returns empty list but preserves signature.
-   return []
- 
-  def formTIs(self, line: Line, glyphs: List[Glyph], gaps: List[Gap]) -> List[TI]:
-   # TODO: group glyphs separated by small gaps.
-   return []
- 
-  def formPMIs(self, line: Line, tis: List[TI], gaps: List[Gap]) -> List[PMI]:
-   # TODO: group TI sequences separated by medium gaps (consumed).
-   return []
- 
-  def emitSpaceTokens(self, line: Line, tokens: List[Token], gaps: List[Gap]) -> List[SpaceToken]:
-   # Appendix B: long gaps become explicit SPACE tokens (not consumed).
-   # TODO: identify which "long" gaps are true SPACE vs indent/EOL void.
-   return []
- 
-  def assertLineGrammar(self, line: Line, tokens: List[Token], merLeft: Optional[MER], merRight: Optional[MER]) -> None:
-   # Grammar: Line ::= MER · token · SPACE · token · ... · token · MER
-   # Stub: only checks kind vocabulary and that SPACE is treated as a token kind.
-   allowed = {"TI", "PMI", "TMI", "SPACE"}
-   for t in tokens:
-    if t.kind not in allowed:
-     raise ValueError(f"Line {line.id}: illegal token kind {t.kind}")
-   if merLeft is None or merRight is None:
-    # In practice these may be absent at margins in early prototypes; keep this strict later.
-    pass
- 
-  # -------------------------
-  # G8 stubs: obstacle collection + MER detection
-  # -------------------------
- 
-  def collectObstacleRectangles(self, tokens: List[Token]) -> List[BBox]:
-   # Appendix B invariant: every token (including SPACE) is an obstacle rectangle in G8.
-   return [t.obstacleBBox() for t in tokens]
- 
-  def detectMERs(self, page: Page, imageBin: Any, obstacles: List[BBox]) -> List[MER]:
-   # TODO: implement MER algorithm (e.g., maximal empty rectangles under obstacles).
-   # IMPORTANT: This function must never be given raw glyph gaps; only token obstacles.
-   return []
- 
-  # -------------------------
-  # G9 stubs: anchor strip + TMI detection hooks
-  # -------------------------
- 
-  def buildAnchorStrip(self, line: Line, thicknessPx: int = 3) -> BBox:
-   # Thin strip just below line bottom.
-   t, l, h, w = line.bbox
-   stripTop = bbox_bottom(line.bbox)
-   return (stripTop, l, thicknessPx, w)
- 
-  def detectTMIs(self, line: Line, pmis: List[PMI], mers: List[MER], xHeightPx: Optional[float]) -> List[TMI]:
-   # TODO: implement MER–TMI–MER (and edge variants) using anchor strip logic.
-   return []
- 
-  # -------------------------
-  # Minimal “run one line” driver for early debugging
-  # -------------------------
- 
-  def processLine_G7_G8(self, page: Page, line: Line, glyphs: List[Glyph], imageBin: Any) -> Dict[str, Any]:
-   gaps = self.classifyGaps(line, glyphs)
-   tis = self.formTIs(line, glyphs, gaps)
-   pmis = self.formPMIs(line, tis, gaps)
-   baseTokens: List[Token] = []
-   baseTokens.extend(tis)
-   baseTokens.extend(pmis)
-   spaceTokens = self.emitSpaceTokens(line, baseTokens, gaps)
-   allTokens: List[Token] = []
-   allTokens.extend(baseTokens)
-   allTokens.extend(spaceTokens)
- 
-   obstacles = self.collectObstacleRectangles(allTokens)
-   mers = self.detectMERs(page, imageBin, obstacles)
- 
-   # NOTE: merLeft/merRight not computed yet in stubs.
-   self.assertLineGrammar(line, allTokens, merLeft=None, merRight=None)
- 
-   return {
-    "lineId": line.id,
-    "gaps": gaps,
-    "tis": tis,
-    "pmis": pmis,
-    "spaceTokens": spaceTokens,
-    "tokens": allTokens,
-    "obstacles": obstacles,
-    "mers": mers,
-   }
