In [1]:
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field
from pathlib import Path

from tnh_scholar.text_processing import NumberedText
from tnh_scholar import logging_config
from tnh_scholar.logging_config import setup_logging, get_child_logger

logger = get_child_logger(__name__)

# Core models
class LogicalSection(BaseModel):
    """Represents a contextually meaningful segment of a larger text.
    
    Sections should preserve natural breaks in content (e.g., explicit section markers, topic shifts,
    argument development, narrative progression) while staying within specified size limits 
    in order to create chunks suitable for AI processing."""
    start_line: int = Field(
        ..., 
        description="Starting line number that begins this logical segment"
    )
    title: str = Field(
        ...,
        description="Descriptive title of section's key content"
    )

class TextObjectResponse(BaseModel):
    """Text Object for dividing large texts into AI-processable segments while
    maintaining broader document context."""
    document_summary: str = Field(
        ...,
        description="Concise, comprehensive overview of the text's content and purpose"
    )
    document_metadata: str = Field(
        ...,
        description="Available Dublin Core standard metadata in human-readable format"
    )
    key_concepts: str = Field(
        ...,
        description="Important terms, ideas, or references that appear throughout the text"
    )
    narrative_context: str = Field(
        ...,
        description="Concise overview of how the text develops or progresses as a whole"
    )
    language: str = Field(..., description="ISO 639-1 language code")
    sections: List[LogicalSection]

class TextObject:
    """Core text management with implicit section boundaries."""
    
    def __init__(self, content: 'NumberedText', language: str):
        self.content = content  # immutable
        self.language = language
        self.sections: List[LogicalSection] = []
        self._metadata: Dict[str, Any] = {}
        
    @classmethod
    def from_response(cls, response: TextObjectResponse, content: 'NumberedText') -> 'TextObject':
        """Create TextObject from AI response format."""
        obj = cls(content=content, language=response.language)
        obj.sections = response.sections
        
        # Store metadata from response (basic for PoC)
        obj._metadata = {
            "summary": response.document_summary,
            "metadata": response.document_metadata,
            "concepts": response.key_concepts,
            "context": response.narrative_context
        }
        return obj

    def validate_sections(self) -> None:
        """Basic validation of section integrity."""
        if not self.sections:
            raise ValueError("TextObject must have at least one section")
            
        # Check section ordering and bounds
        for i, section in enumerate(self.sections):
            if section.start_line < 1:
                logger.warning(f"Section {i}: start line must be >= 1")
                section.start_line = 1
            if section.start_line > self.content.size:
                logger.warning(f"Section {i}: start line exceeds text length")
                section.start_line = self.content.size
            if i > 0 and section.start_line <= self.sections[i-1].start_line:
                logger.warning(f"Section {i}: non-sequential start line")
                section.start_line = self.sections[i-1].start_line + 1

    def get_section_content(self, index: int) -> str:
        """Get content for a section using implicit end lines."""
        if index < 0 or index >= len(self.sections):
            raise IndexError("Section index out of range")
            
        start = self.sections[index].start_line
        # End is one less than start of next section (or end of text)
        end = (self.sections[index + 1].start_line - 1
               if index < len(self.sections) - 1 
               else self.content.size)
        
        return self.content.get_segment(start, end)

    @property
    def metadata(self) -> Dict[str, Any]:
        """Access to metadata dictionary."""
        return self._metadata.copy()  # Return copy to prevent direct modification