diff --git a/src/codegen/extensions/tools/link_annotation.py b/src/codegen/extensions/tools/link_annotation.py new file mode 100644 index 000000000..45e75f134 --- /dev/null +++ b/src/codegen/extensions/tools/link_annotation.py @@ -0,0 +1,125 @@ +"""Tool for viewing PR contents and modified symbols.""" + +import re +from enum import StrEnum +from typing import Callable + +from codegen import Codebase + + +class MessageChannel(StrEnum): + LINEAR = "linear" + MARKDOWN = "markdown" + HTML = "html" + SLACK = "slack" + + +def format_link_linear(name: str, url: str) -> str: + return f"[{name}]({url})" + + +def format_link_markdown(name: str, url: str) -> str: + return f"[{name}]({url})" + + +def format_link_html(name: str, url: str) -> str: + return f"{name}" + + +def format_link_slack(name: str, url: str) -> str: + return f"<{url}|{name}>" + + +LINK_FORMATS: dict[MessageChannel, Callable[[str, str], str]] = { + "linear": format_link_linear, + "markdown": format_link_markdown, + "html": format_link_html, + "slack": format_link_slack, +} + + +def clean_github_url(url: str) -> str: + """Clean a GitHub URL by removing access tokens and standardizing format.""" + # Remove access token if present + url = re.sub(r"https://[^@]+@", "https://", url) + + # Ensure it starts with standard github.com + if not url.startswith("https://github.com"): + url = "https://github.com" + url.split("github.com")[-1] + + return url + + +def format_link(name: str, url: str | None, format: MessageChannel = MessageChannel.SLACK) -> str: + # Clean the URL if it's a GitHub URL + if url is None: + url = "" + if "github.com" in url: + url = clean_github_url(url) + return LINK_FORMATS[format](name, url) + + +def extract_code_snippets(message: str) -> list[str]: + """Find all text wrapped in single backticks, excluding content in code blocks. + + Args: + message: The message to process + + Returns: + List of strings found between single backticks, excluding those in code blocks + """ + # First remove all code blocks (text between ```) + code_block_pattern = r"```[^`]*```" + message_without_blocks = re.sub(code_block_pattern, "", message) + + # Then find all text wrapped in single backticks + matches = re.findall(r"`([^`]+)`", message_without_blocks) + return matches + + +def is_likely_filepath(text: str) -> bool: + """Check if a string looks like a filepath.""" + # Common file extensions we want to link + extensions = [".py", ".ts", ".tsx", ".jsx", ".js", ".json", ".mdx", ".md", ".yaml", ".yml", ".toml"] + + # Check if it contains a slash (path separator) + if "/" in text: + return True + + # Check if it ends with a common file extension + return any(text.endswith(ext) for ext in extensions) + + +def add_links_to_message(message: str, codebase: Codebase, channel: MessageChannel = MessageChannel.SLACK) -> str: + """Add links to symbols and files in a message. + + This function: + 1. Links code snippets that match symbol names + 2. Links anything that looks like a filepath + + Args: + message: The message to process + codebase: The codebase to look up symbols and files in + channel: The message channel format to use + + Returns: + The message with appropriate links added + """ + snippets = extract_code_snippets(message) + for snippet in snippets: + # Filepaths + if is_likely_filepath(snippet): + file = codebase.get_file(snippet, optional=True) + if file: + link = format_link(snippet, file.github_url, channel) + message = message.replace(f"`{snippet}`", link) + + # Symbols + else: + symbols = codebase.get_symbols(snippet) + # Only link if there's exactly one symbol + if len(symbols) == 1: + link = format_link(symbols[0].name, symbols[0].github_url, channel) + message = message.replace(f"`{snippet}`", link) + + return message diff --git a/src/codegen/sdk/core/file.py b/src/codegen/sdk/core/file.py index 540800190..e857273f4 100644 --- a/src/codegen/sdk/core/file.py +++ b/src/codegen/sdk/core/file.py @@ -223,7 +223,11 @@ def owners(self) -> set[str]: @noapidoc def github_url(self) -> str | None: if self.ctx.base_url: - return self.ctx.base_url + "/" + self.file_path + if self.ctx.base_url.endswith(".git"): + print("HERE") + return self.ctx.base_url.replace(".git", "/blob/develop/") + self.file_path + else: + return self.ctx.base_url + "/" + self.file_path @property @reader diff --git a/tests/unit/codegen/extensions/langchain/__init__.py b/tests/unit/codegen/extensions/langchain/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/codegen/extensions/langchain/test_agent.py b/tests/unit/codegen/extensions/langchain/test_agent.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/codegen/extensions/test_message_annotation.py b/tests/unit/codegen/extensions/test_message_annotation.py new file mode 100644 index 000000000..0063c27e0 --- /dev/null +++ b/tests/unit/codegen/extensions/test_message_annotation.py @@ -0,0 +1,194 @@ +"""Tests for message annotation functionality.""" + +import pytest + +from codegen.extensions.tools.link_annotation import ( + MessageChannel, + add_links_to_message, + extract_code_snippets, + format_link, + is_likely_filepath, +) +from codegen.sdk.codebase.factory.get_session import get_codebase_session + + +@pytest.fixture +def codebase(tmpdir): + """Create a simple codebase for testing.""" + # language=python + content = """ +def hello(): + print("Hello, world!") + +class Greeter: + def greet(self): + hello() + +def hello_duplicate(): + pass + +def hello_duplicate(): + pass + +class Greeter_duplicate: + pass +""" + # Create multiple files to test file linking + files = { + "src/main.py": content, + "src/utils/helpers.py": "# Helper functions", + "docs/README.md": "# Documentation", + "tsconfig.json": "{}", + } + with get_codebase_session(tmpdir=tmpdir, files=files) as codebase: + yield codebase + + +def test_format_link_linear(): + """Test linear link formatting.""" + assert format_link("test", "http://example.com", MessageChannel.LINEAR) == "[test](http://example.com)" + + +def test_format_link_markdown(): + """Test markdown link formatting.""" + assert format_link("test", "http://example.com", MessageChannel.MARKDOWN) == "[test](http://example.com)" + + +def test_format_link_html(): + """Test HTML link formatting.""" + assert format_link("test", "http://example.com", MessageChannel.HTML) == "test" + + +def test_format_link_slack(): + """Test Slack link formatting.""" + assert format_link("test", "http://example.com", MessageChannel.SLACK) == "" + + +def test_extract_code_snippets(): + """Test extracting code snippets from messages.""" + message = "Here is some `code` and `more code` and ```a code block``` and `final code`" + snippets = extract_code_snippets(message) + assert snippets == ["code", "more code", "final code"] + + +def test_is_likely_filepath(): + """Test filepath detection.""" + # Should detect paths with slashes + assert is_likely_filepath("src/file.py") + assert is_likely_filepath("path/to/file") + + # Should detect common extensions + assert is_likely_filepath("file.py") + assert is_likely_filepath("component.tsx") + assert is_likely_filepath("config.json") + assert is_likely_filepath("README.md") + + # Should not detect regular words + assert not is_likely_filepath("hello") + assert not is_likely_filepath("Greeter") + assert not is_likely_filepath("function") + + +def test_add_links_single_symbol(codebase): + """Test adding links for a single symbol.""" + message = "Here is the `hello` function" + result = add_links_to_message(message, codebase, channel=MessageChannel.SLACK) + assert "|hello>" in result + + +def test_add_links_class(codebase): + """Test adding links for a class.""" + message = "The `Greeter` class" + result = add_links_to_message(message, codebase) + assert "Greeter" in result + assert result.count("<") == 1 # One link should be created + + +def test_add_links_filepath(codebase): + """Test adding links for filepaths.""" + message = "Check out `src/main.py` and `src/utils/helpers.py`" + result = add_links_to_message(message, codebase) + assert "|src/main.py>" in result + assert "|src/utils/helpers.py>" in result + + +def test_add_links_filepath_with_extension(codebase): + """Test adding links for files with common extensions.""" + message = "See `tsconfig.json` and `docs/README.md`" + result = add_links_to_message(message, codebase) + assert "|tsconfig.json>" in result + assert "|docs/README.md>" in result + + +def test_nonexistent_filepath(codebase): + """Test handling of nonexistent filepaths.""" + message = "This `src/nonexistent.py` should not be linked" + result = add_links_to_message(message, codebase) + assert result == message # Message should remain unchanged + + +def test_ignore_code_blocks(codebase): + """Test that code blocks are ignored.""" + message = """Here's a code block: +```python +def hello(): + print("Hello!") +``` +And here's an inline `hello` reference.""" + + result = add_links_to_message(message, codebase) + # The inline reference should be linked + assert "<" in result + # But the code block should remain unchanged + assert "```python" in result + assert "def hello():" in result + + +def test_nonexistent_symbol(codebase): + """Test handling of nonexistent symbols.""" + message = "This `nonexistent_function` should not be linked" + result = add_links_to_message(message, codebase) + assert result == message # Message should remain unchanged + + +def test_duplicate_symbols(codebase): + """Test handling of duplicate symbols.""" + message = "This `hello_duplicate` should not be linked" + result = add_links_to_message(message, codebase) + assert result == message # Message should remain unchanged + + +def test_mixed_content(codebase): + """Test message with mixed content types.""" + message = """Here's a complex message: +- Valid symbol: `hello` +- Valid file: `src/main.py` +- Invalid symbol: `nonexistent` +- Invalid file: `src/nonexistent.py` +- Code block: +```python +def hello(): + pass +``` +- Duplicate symbol: `hello_duplicate` +- Another valid symbol: `Greeter` +- Another valid file: `docs/README.md` +""" + result = add_links_to_message(message, codebase) + + # Valid symbols should be linked + assert "|hello>" in result + assert "|Greeter>" in result + + # Valid files should be linked + assert "|src/main.py>" in result + assert "|docs/README.md>" in result + + # Invalid symbols and files should remain as-is + assert "`nonexistent`" in result + assert "`src/nonexistent.py`" in result + assert "`hello_duplicate`" in result + + # Code block should be preserved + assert "```python" in result + assert "def hello():" in result