# models

> Data models and URL bundles for Phase 2 Left Column: Text Segmentation

In [None]:
#| default_exp models

In [None]:
#| export
from typing import Optional, List, Dict, Any
from typing_extensions import TypedDict
from dataclasses import dataclass, field, asdict, fields

from cjm_fasthtml_card_stack.core.models import CardStackUrls

## TextSegment

Represents a text segment during the workflow before it's committed to the graph. This is the mutable working copy used during decomposition.

In [None]:
#| export
@dataclass
class TextSegment:
    """A text segment during workflow processing before graph commit."""
    
    index: int  # Sequence position (0-indexed)
    text: str  # Segment text content
    
    # Source coordinates (from original transcription)
    source_id: Optional[str] = None  # ID of source block
    source_provider_id: Optional[str] = None  # Source provider identifier
    start_char: Optional[int] = None  # Start character index in source
    end_char: Optional[int] = None  # End character index in source
    
    def to_dict(self) -> Dict[str, Any]:  # Dictionary representation
        """Convert to dictionary for JSON serialization."""
        return asdict(self)
    
    @classmethod
    def from_dict(
        cls,
        data: Dict[str, Any]  # Dictionary representation
    ) -> "TextSegment":  # Reconstructed TextSegment
        """Create from dictionary, filtering out legacy/unknown fields."""
        valid_field_names = {f.name for f in fields(cls)}
        filtered = {k: v for k, v in data.items() if k in valid_field_names}
        return cls(**filtered)

## SegmentationStepState

TypedDict for Phase 2 segmentation step state. Provides type safety for the state structure used by the segmentation (left column) step.

In [None]:
#| export
class SegmentationStepState(TypedDict, total=False):
    """State for Phase 2 (left column): Text Segmentation."""

    # --- Workflow-specific ---
    is_initialized: bool  # Whether segments have been initialized from Phase 1
    segments: List[Dict[str, Any]]  # Working segments (serialized TextSegment)
    initial_segments: List[Dict[str, Any]]  # Original segments from initial split (for reset)

    # --- Card stack view state (extractable to cjm-fasthtml-card-stack) ---
    focused_index: int  # Currently focused item index (default 0)
    visible_count: int  # Number of visible cards in viewport
    is_auto_mode: bool  # Whether card count is in auto-adjust mode
    card_width: int  # Card stack width in rem units
    history: List[List[Dict[str, Any]]]  # Stack of previous item state snapshots

## SegmentationUrls

URL bundle for Phase 2 segmentation handlers and renderers. Composes `CardStackUrls` (from `cjm-fasthtml-card-stack` library) for navigation/viewport operations alongside workflow-specific split, merge, and toolbar URLs.

In [None]:
#| export
@dataclass
class SegmentationUrls:
    """URL bundle for Phase 2 segmentation route handlers and renderers."""

    # Card stack navigation and viewport (from cjm-fasthtml-card-stack library)
    card_stack: CardStackUrls = field(default_factory=CardStackUrls)

    # Workflow-specific: card operations
    split: str = ""  # Execute split at word position
    merge: str = ""  # Merge segment with previous
    enter_split: str = ""  # Enter split mode for focused segment
    exit_split: str = ""  # Exit split mode

    # Workflow-specific: toolbar
    reset: str = ""  # Reset to initial segments
    ai_split: str = ""  # AI (NLTK) re-split
    undo: str = ""  # Undo last operation

    # Workflow-specific: initialization
    init: str = ""  # Initialize segments from Phase 1

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()