diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py index 86f3cc3dc..933cb628d 100644 --- a/src/codegen/extensions/langchain/tools.py +++ b/src/codegen/extensions/langchain/tools.py @@ -45,21 +45,41 @@ class ViewFileInput(BaseModel): """Input for viewing a file.""" filepath: str = Field(..., description="Path to the file relative to workspace root") + start_line: Optional[int] = Field(None, description="Starting line number to view (1-indexed, inclusive)") + end_line: Optional[int] = Field(None, description="Ending line number to view (1-indexed, inclusive)") + max_lines: Optional[int] = Field(None, description="Maximum number of lines to view at once, defaults to 250") + line_numbers: Optional[bool] = Field(True, description="If True, add line numbers to the content (1-indexed)") class ViewFileTool(BaseTool): """Tool for viewing file contents and metadata.""" name: ClassVar[str] = "view_file" - description: ClassVar[str] = "View the contents and metadata of a file in the codebase" + description: ClassVar[str] = """View the contents and metadata of a file in the codebase. +For large files (>250 lines), content will be paginated. Use start_line and end_line to navigate through the file. +The response will indicate if there are more lines available to view.""" args_schema: ClassVar[type[BaseModel]] = ViewFileInput codebase: Codebase = Field(exclude=True) def __init__(self, codebase: Codebase) -> None: super().__init__(codebase=codebase) - def _run(self, filepath: str) -> str: - result = view_file(self.codebase, filepath) + def _run( + self, + filepath: str, + start_line: Optional[int] = None, + end_line: Optional[int] = None, + max_lines: Optional[int] = None, + line_numbers: Optional[bool] = True, + ) -> str: + result = view_file( + self.codebase, + filepath, + line_numbers=line_numbers if line_numbers is not None else True, + start_line=start_line, + end_line=end_line, + max_lines=max_lines if max_lines is not None else 250, + ) return result.render() diff --git a/src/codegen/extensions/tools/view_file.py b/src/codegen/extensions/tools/view_file.py index a6e0e3c84..88bb4bc28 100644 --- a/src/codegen/extensions/tools/view_file.py +++ b/src/codegen/extensions/tools/view_file.py @@ -22,13 +22,40 @@ class ViewFileObservation(Observation): default=None, description="Number of lines in the file", ) + start_line: Optional[int] = Field( + default=None, + description="Starting line number of the content (1-indexed)", + ) + end_line: Optional[int] = Field( + default=None, + description="Ending line number of the content (1-indexed)", + ) + has_more: Optional[bool] = Field( + default=None, + description="Whether there are more lines after end_line", + ) + max_lines_per_page: Optional[int] = Field( + default=None, + description="Maximum number of lines that can be viewed at once", + ) - str_template: ClassVar[str] = "File {filepath} ({line_count} lines)" + str_template: ClassVar[str] = "File {filepath} (showing lines {start_line}-{end_line} of {line_count})" def render(self) -> str: - return f"""[VIEW FILE]: {self.filepath} ({self.line_count} lines) -{self.content} -""" + """Render the file view with pagination information if applicable.""" + header = f"[VIEW FILE]: {self.filepath}" + if self.line_count is not None: + header += f" ({self.line_count} lines total)" + + if self.start_line is not None and self.end_line is not None: + header += f"\nShowing lines {self.start_line}-{self.end_line}" + if self.has_more: + header += f" (more lines available, max {self.max_lines_per_page} lines per page)" + + if not self.content: + return f"{header}\n" + + return f"{header}\n\n{self.content}" def add_line_numbers(content: str) -> str: @@ -45,13 +72,23 @@ def add_line_numbers(content: str) -> str: return "\n".join(f"{i + 1:>{width}}|{line}" for i, line in enumerate(lines)) -def view_file(codebase: Codebase, filepath: str, line_numbers: bool = True) -> ViewFileObservation: +def view_file( + codebase: Codebase, + filepath: str, + line_numbers: bool = True, + start_line: Optional[int] = None, + end_line: Optional[int] = None, + max_lines: int = 250, +) -> ViewFileObservation: """View the contents and metadata of a file. Args: codebase: The codebase to operate on filepath: Path to the file relative to workspace root line_numbers: If True, add line numbers to the content (1-indexed) + start_line: Starting line number to view (1-indexed, inclusive) + end_line: Ending line number to view (1-indexed, inclusive) + max_lines: Maximum number of lines to view at once, defaults to 250 """ try: file = codebase.get_file(filepath) @@ -62,15 +99,56 @@ def view_file(codebase: Codebase, filepath: str, line_numbers: bool = True) -> V filepath=filepath, content="", line_count=0, + start_line=start_line, + end_line=end_line, + has_more=False, + max_lines_per_page=max_lines, ) - content = file.content - if line_numbers: - content = add_line_numbers(content) + # Split content into lines and get total line count + lines = file.content.splitlines() + total_lines = len(lines) + + # If no start_line specified, start from beginning + if start_line is None: + start_line = 1 + + # Ensure start_line is within bounds + start_line = max(1, min(start_line, total_lines)) + + # If no end_line specified, show up to max_lines from start + if end_line is None: + end_line = min(start_line + max_lines - 1, total_lines) + else: + # Ensure end_line is within bounds and doesn't exceed max_lines from start + end_line = min(end_line, total_lines, start_line + max_lines - 1) - return ViewFileObservation( + # Extract the requested lines (convert to 0-based indexing) + content_lines = lines[start_line - 1 : end_line] + content = "\n".join(content_lines) + + # Add line numbers if requested + if line_numbers: + # Pass the actual line numbers for proper numbering + numbered_lines = [] + width = len(str(total_lines)) # Use total_lines for consistent width + for i, line in enumerate(content_lines, start=start_line): + numbered_lines.append(f"{i:>{width}}|{line}") + content = "\n".join(numbered_lines) + + # Create base observation with common fields + observation = ViewFileObservation( status="success", filepath=file.filepath, content=content, - line_count=len(content.splitlines()), + line_count=total_lines, ) + + # Only include pagination fields if file exceeds max_lines + if total_lines > max_lines: + observation.start_line = start_line + observation.end_line = end_line + observation.has_more = end_line < total_lines + observation.max_lines_per_page = max_lines + + return observation diff --git a/tests/unit/codegen/extensions/test_tools.py b/tests/unit/codegen/extensions/test_tools.py index fd0fafb31..9d2b6fdb5 100644 --- a/tests/unit/codegen/extensions/test_tools.py +++ b/tests/unit/codegen/extensions/test_tools.py @@ -40,12 +40,160 @@ def greet(self): yield codebase +@pytest.fixture +def large_codebase(tmpdir): + """Create a codebase with a large file for pagination testing.""" + # Create a large file with predictable content + large_file_lines = [] + # Add imports at the top + large_file_lines.extend( + [ + "from __future__ import annotations", + "import sys", + "import os", + "from typing import List, Optional, Dict", + "", + "# Constants", + "MAX_ITEMS = 100", + "DEBUG = False", + "", + "# Main class definition", + "class LargeClass:", + ] + ) + + # Add methods with incrementing numbers + for i in range(1, 401): # This will create a 400+ line file + if i % 20 == 0: + # Add some class methods periodically + large_file_lines.extend([" @classmethod", f" def class_method_{i}(cls) -> None:", f" print('Class method {i}')", " return None", ""]) + else: + # Add regular methods + large_file_lines.extend( + [ + f" def method_{i}(self, param_{i}: int) -> str:", + f" # Method {i} does something interesting", + f" value = param_{i} * {i}", + f" return f'Method {i} computed: {{value}}'", + "", + ] + ) + + large_file_content = "\n".join(large_file_lines) + + files = { + "src/main.py": """ +def hello(): + print("Hello, world!") +""", + "src/large_file.py": large_file_content, + } + + with get_codebase_session(tmpdir=tmpdir, files=files) as codebase: + yield codebase + + def test_view_file(codebase): """Test viewing a file.""" + # Test basic file viewing result = view_file(codebase, "src/main.py") assert result.status == "success" assert result.filepath == "src/main.py" assert "hello()" in result.content + # For small files, pagination fields should not be present + assert result.start_line is None + assert result.end_line is None + assert result.has_more is None + assert result.max_lines_per_page is None + + +def test_view_file_pagination(large_codebase): + """Test viewing a file with pagination.""" + # Test default pagination (should show first max_lines lines) + result = view_file(large_codebase, "src/large_file.py") + assert result.status == "success" + assert result.start_line == 1 + assert result.end_line == 250 # Default max_lines + assert result.has_more is True + assert result.max_lines_per_page == 250 + assert "from __future__ import annotations" in result.content # First line + assert "def method_1" in result.content # Early method + assert "def method_251" not in result.content # Method after page 1 + + # Test custom pagination range + result = view_file(large_codebase, "src/large_file.py", start_line=200, end_line=250) + assert result.status == "success" + assert result.start_line == 200 + assert result.end_line == 250 + assert result.has_more is True + assert "def method_39" in result.content # Regular method before class method + assert "def class_method_40" in result.content # Class method at 40 + assert "def method_41" in result.content # Regular method after class method + assert "from __future__ import annotations" not in result.content # Before range + assert "def method_251" not in result.content # After range + + # Test viewing end of file + result = view_file(large_codebase, "src/large_file.py", start_line=350) + assert result.status == "success" + assert result.start_line == 350 + assert result.has_more is True # File has 2010 lines, so there should be more content + assert "def method_69" in result.content # Regular method + assert "def class_method_80" in result.content # Class method at 80 + assert result.end_line == 599 # Should show 250 lines from start (350 to 599) + + # Test custom max_lines + result = view_file(large_codebase, "src/large_file.py", max_lines=100) + assert result.status == "success" + assert result.start_line == 1 + assert result.end_line == 100 + assert result.has_more is True + assert result.max_lines_per_page == 100 + assert "from __future__ import annotations" in result.content + assert len(result.content.splitlines()) <= 100 + + # Test line numbers display + result = view_file(large_codebase, "src/large_file.py", start_line=198, end_line=202, line_numbers=True) + assert result.status == "success" + assert "198|" in result.content + assert "199|" in result.content + assert "200|" in result.content + assert "201|" in result.content + assert "202|" in result.content + + # Test without line numbers + result = view_file(large_codebase, "src/large_file.py", start_line=198, end_line=202, line_numbers=False) + assert result.status == "success" + assert "198|" not in result.content + assert "199|" not in result.content + + +def test_view_file_pagination_edge_cases(large_codebase): + """Test edge cases for file pagination.""" + # Test start_line > end_line (should respect provided end_line) + result = view_file(large_codebase, "src/large_file.py", start_line=200, end_line=100) + assert result.status == "success" + assert result.start_line == 200 + assert result.end_line == 100 # Should respect provided end_line + assert result.content == "" # No content since end_line < start_line + + # Test start_line > file length (should adjust to valid range) + result = view_file(large_codebase, "src/large_file.py", start_line=2000) + assert result.status == "success" + assert result.start_line == 2000 # Should use provided start_line + assert result.end_line == 2010 # Should adjust to total lines + assert result.has_more is False + + # Test end_line > file length (should truncate to file length) + result = view_file(large_codebase, "src/large_file.py", start_line=200, end_line=2000) + assert result.status == "success" + assert result.start_line == 200 + assert result.end_line == min(200 + 250 - 1, 2010) # Should respect max_lines and file length + + # Test negative start_line (should default to 1) + result = view_file(large_codebase, "src/large_file.py", start_line=-10) + assert result.status == "success" + assert result.start_line == 1 + assert result.end_line == 250 def test_list_directory(codebase):