From 7d3e235d6ddde8e61d625ba5fdf42a457fa0dc74 Mon Sep 17 00:00:00 2001 From: jayhack Date: Tue, 4 Mar 2025 16:26:38 -0800 Subject: [PATCH 1/2] . --- src/codegen/extensions/langchain/tools.py | 41 ++++++++-- .../extensions/tools/github/__init__.py | 2 + src/codegen/extensions/tools/github/search.py | 77 +++++++++++++++++++ src/codegen/git/clients/git_repo_client.py | 11 +++ tests/integration/extension/test_github.py | 51 ++++++++++-- 5 files changed, 168 insertions(+), 14 deletions(-) create mode 100644 src/codegen/extensions/tools/github/search.py diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py index acfbdaf59..c63843738 100644 --- a/src/codegen/extensions/langchain/tools.py +++ b/src/codegen/extensions/langchain/tools.py @@ -7,6 +7,7 @@ from codegen.extensions.linear.linear_client import LinearClient from codegen.extensions.tools.bash import run_bash_command +from codegen.extensions.tools.github.search import search from codegen.extensions.tools.linear.linear import ( linear_comment_on_issue_tool, linear_create_issue_tool, @@ -20,7 +21,6 @@ from codegen.extensions.tools.relace_edit import relace_edit from codegen.extensions.tools.replacement_edit import replacement_edit from codegen.extensions.tools.reveal_symbol import reveal_symbol -from codegen.extensions.tools.search import search from codegen.extensions.tools.semantic_edit import semantic_edit from codegen.extensions.tools.semantic_search import semantic_search from codegen.sdk.core.codebase import Codebase @@ -560,6 +560,28 @@ def _run(self, title: str, body: str) -> str: return result.render() +class GithubSearchIssuesInput(BaseModel): + """Input for searching GitHub issues.""" + + query: str = Field(..., description="Search query string to find issues") + + +class GithubSearchIssuesTool(BaseTool): + """Tool for searching GitHub issues.""" + + name: ClassVar[str] = "search_issues" + description: ClassVar[str] = "Search for GitHub issues/PRs using a query string from pygithub, e.g. 'is:pr is:open test_query'" + args_schema: ClassVar[type[BaseModel]] = GithubSearchIssuesInput + codebase: Codebase = Field(exclude=True) + + def __init__(self, codebase: Codebase) -> None: + super().__init__(codebase=codebase) + + def _run(self, query: str) -> str: + result = search(self.codebase, query) + return result.render() + + class GithubViewPRInput(BaseModel): """Input for getting PR contents.""" @@ -856,6 +878,7 @@ def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]: GithubCreatePRCommentTool(codebase), GithubCreatePRReviewCommentTool(codebase), GithubViewPRTool(codebase), + GithubSearchIssuesTool(codebase), # Linear LinearGetIssueTool(codebase), LinearGetIssueCommentsTool(codebase), @@ -870,22 +893,28 @@ class ReplacementEditInput(BaseModel): filepath: str = Field(..., description="Path to the file to edit relative to the workspace root. The file must exist and be a text file.") pattern: str = Field( ..., - description="Regular expression pattern to match text that should be replaced. Supports all Python regex syntax including capture groups (\1, \2, etc). The pattern is compiled with re.MULTILINE flag by default.", + description="""Regular expression pattern to match text that should be replaced. +Supports all Python regex syntax including capture groups (\1, \2, etc). The pattern is compiled with re.MULTILINE flag by default.""", ) replacement: str = Field( ..., - description="Text to replace matched patterns with. Can reference regex capture groups using \1, \2, etc. If using regex groups in pattern, make sure to preserve them in replacement if needed.", + description="""Text to replace matched patterns with. +Can reference regex capture groups using \1, \2, etc. If using regex groups in pattern, make sure to preserve them in replacement if needed.""", ) start: int = Field( - default=1, description="Starting line number (1-indexed, inclusive) to begin replacements from. Use this with 'end' to limit changes to a specific region. Default is 1 (start of file)." + default=1, + description="""Starting line number (1-indexed, inclusive) to begin replacements from. +Use this with 'end' to limit changes to a specific region. Default is 1 (start of file).""", ) end: int = Field( default=-1, - description="Ending line number (1-indexed, inclusive) to stop replacements at. Use -1 to indicate end of file. Use this with 'start' to limit changes to a specific region. Default is -1 (end of file).", + description="""Ending line number (1-indexed, inclusive) to stop replacements at. +Use -1 to indicate end of file. Use this with 'start' to limit changes to a specific region. Default is -1 (end of file).""", ) count: Optional[int] = Field( default=None, - description="Maximum number of replacements to make. Use None to replace all occurrences (default), or specify a number to limit replacements. Useful when you only want to replace the first N occurrences.", + description="""Maximum number of replacements to make. Use None to replace all occurrences (default), or specify a number to limit replacements. +Useful when you only want to replace the first N occurrences.""", ) diff --git a/src/codegen/extensions/tools/github/__init__.py b/src/codegen/extensions/tools/github/__init__.py index a59669dd2..f5f9761f3 100644 --- a/src/codegen/extensions/tools/github/__init__.py +++ b/src/codegen/extensions/tools/github/__init__.py @@ -1,11 +1,13 @@ from .create_pr import create_pr from .create_pr_comment import create_pr_comment from .create_pr_review_comment import create_pr_review_comment +from .search import search from .view_pr import view_pr __all__ = [ "create_pr", "create_pr_comment", "create_pr_review_comment", + "search", "view_pr", ] diff --git a/src/codegen/extensions/tools/github/search.py b/src/codegen/extensions/tools/github/search.py new file mode 100644 index 000000000..b83504937 --- /dev/null +++ b/src/codegen/extensions/tools/github/search.py @@ -0,0 +1,77 @@ +"""Tools for searching GitHub issues and pull requests.""" + +from typing import ClassVar + +from pydantic import Field + +from codegen.sdk.core.codebase import Codebase + +from ..observation import Observation + + +class SearchResultObservation(Observation): + """Response from searching issues and pull requests.""" + + query: str = Field( + description="The search query that was used", + ) + results: list[dict] = Field( + description="List of matching issues/PRs with their details. Use is:pr in query to search for PRs, is:issue for issues.", + ) + + str_template: ClassVar[str] = "Found {total} results matching query: {query}" + + @property + def total(self) -> int: + return len(self.results) + + +def search( + codebase: Codebase, + query: str, + max_results: int = 20, +) -> SearchResultObservation: + """Search for GitHub issues and pull requests using the provided query. + + To search for pull requests specifically, include 'is:pr' in your query. + To search for issues specifically, include 'is:issue' in your query. + If neither is specified, both issues and PRs will be included in results. + + Args: + codebase: The codebase to operate on + query: Search query string (e.g. "is:pr label:bug", "is:issue is:open") + state: Filter by state ("open", "closed", or "all") + max_results: Maximum number of results to return + """ + try: + # Get the GitHub repo object + repo = codebase._op.remote_git_repo + + # Search using PyGitHub's search_issues (which searches both issues and PRs) + results = [] + for item in repo.search_issues(query)[:max_results]: + result = { + "title": item.title, + "number": item.number, + "state": item.state, + "labels": [label.name for label in item.labels], + "created_at": item.created_at.isoformat(), + "updated_at": item.updated_at.isoformat(), + "url": item.html_url, + "is_pr": item.pull_request is not None, + } + results.append(result) + + return SearchResultObservation( + status="success", + query=query, + results=results, + ) + + except Exception as e: + return SearchResultObservation( + status="error", + error=f"Failed to search: {e!s}", + query=query, + results=[], + ) diff --git a/src/codegen/git/clients/git_repo_client.py b/src/codegen/git/clients/git_repo_client.py index 79c8df2a4..e90735639 100644 --- a/src/codegen/git/clients/git_repo_client.py +++ b/src/codegen/git/clients/git_repo_client.py @@ -7,6 +7,7 @@ from github.Commit import Commit from github.GithubException import GithubException, UnknownObjectException from github.GithubObject import NotSet, Opt +from github.Issue import Issue from github.IssueComment import IssueComment from github.Label import Label from github.PullRequest import PullRequest @@ -431,3 +432,13 @@ def merge_upstream(self, branch_name: str) -> bool: post_parameters = {"branch": branch_name} status, _, _ = self.repo._requester.requestJson("POST", f"{self.repo.url}/merge-upstream", input=post_parameters) return status == 200 + + #################################################################################################################### + # SEARCH + #################################################################################################################### + + def search_issues(self, query: str, **kwargs) -> list[Issue]: + return self.gh_client.client.search_issues(query, **kwargs) + + def search_prs(self, query: str, **kwargs) -> list[PullRequest]: + return self.gh_client.client.search_issues(query, **kwargs) diff --git a/tests/integration/extension/test_github.py b/tests/integration/extension/test_github.py index 7f050bba7..4b6b82693 100644 --- a/tests/integration/extension/test_github.py +++ b/tests/integration/extension/test_github.py @@ -1,17 +1,16 @@ -"""Tests for Linear tools.""" +"""Tests for GitHub tools.""" import os import pytest -from codegen.extensions.linear.linear_client import LinearClient -from codegen.extensions.tools.github import view_pr +from codegen.extensions.tools.github import search, view_pr from codegen.sdk.core.codebase import Codebase @pytest.fixture -def client() -> LinearClient: - """Create a Linear client for testing.""" +def codebase() -> Codebase: + """Create a Codebase instance for testing.""" token = os.getenv("GITHUB_TOKEN") if not token: pytest.skip("GITHUB_TOKEN environment variable not set") @@ -19,8 +18,44 @@ def client() -> LinearClient: return codebase -def test_github_view_pr(client: LinearClient) -> None: - """Test getting an issue from Linear.""" +def test_github_view_pr(codebase: Codebase) -> None: + """Test viewing a PR from GitHub.""" # Link to PR: https://github.com/codegen-sh/Kevin-s-Adventure-Game/pull/419 - pr = view_pr(client, 419) + pr = view_pr(codebase, 419) print(pr) + + +def test_github_search_issues(codebase: Codebase) -> None: + """Test searching GitHub issues.""" + # Search for closed issues with the 'bug' label + result = search(codebase, query="is:issue is:closed") + assert result.status == "success" + assert len(result.results) > 0 + assert "is:issue is:closed" in result.query + + # Verify issue structure + if result.results: + issue = result.results[0] + assert "title" in issue + assert "number" in issue + assert "state" in issue + assert issue["state"] == "closed" + assert not issue["is_pr"] # Should be an issue, not a PR + + +def test_github_search_prs(codebase: Codebase) -> None: + """Test searching GitHub pull requests.""" + # Search for merged PRs + result = search(codebase, query="is:pr is:merged") + assert result.status == "success" + assert len(result.results) > 0 + assert "is:pr is:merged" in result.query + + # Verify PR structure + if result.results: + pr = result.results[0] + assert "title" in pr + assert "number" in pr + assert "state" in pr + assert pr["state"] == "closed" + assert pr["is_pr"] # Should be a PR From f65b020a7d0a22bacc08311fb82e3caf56a48fae Mon Sep 17 00:00:00 2001 From: jayhack Date: Tue, 4 Mar 2025 17:45:03 -0800 Subject: [PATCH 2/2] . --- src/codegen/agents/code_agent.py | 50 +++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/src/codegen/agents/code_agent.py b/src/codegen/agents/code_agent.py index cb7be3ffa..29a26c8d6 100644 --- a/src/codegen/agents/code_agent.py +++ b/src/codegen/agents/code_agent.py @@ -7,7 +7,9 @@ from langsmith import Client from codegen.extensions.langchain.agent import create_codebase_agent -from codegen.extensions.langchain.utils.get_langsmith_url import find_and_print_langsmith_run_url +from codegen.extensions.langchain.utils.get_langsmith_url import ( + find_and_print_langsmith_run_url, +) if TYPE_CHECKING: from codegen import Codebase @@ -16,7 +18,22 @@ class CodeAgent: """Agent for interacting with a codebase.""" - def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", model_name: str = "claude-3-5-sonnet-latest", memory: bool = True, tools: Optional[list[BaseTool]] = None, **kwargs): + codebase: "Codebase" + agent: any + langsmith_client: Client + project_name: str + thread_id: str | None = None + config: dict = {} + + def __init__( + self, + codebase: "Codebase", + model_provider: str = "anthropic", + model_name: str = "claude-3-5-sonnet-latest", + memory: bool = True, + tools: Optional[list[BaseTool]] = None, + **kwargs, + ): """Initialize a CodeAgent. Args: @@ -32,7 +49,14 @@ def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", mode - max_tokens: Maximum number of tokens to generate """ self.codebase = codebase - self.agent = create_codebase_agent(self.codebase, model_provider=model_provider, model_name=model_name, memory=memory, additional_tools=tools, **kwargs) + self.agent = create_codebase_agent( + self.codebase, + model_provider=model_provider, + model_name=model_name, + memory=memory, + additional_tools=tools, + **kwargs, + ) self.langsmith_client = Client() # Get project name from environment variable or use a default @@ -51,13 +75,25 @@ def run(self, prompt: str, thread_id: Optional[str] = None) -> str: """ if thread_id is None: thread_id = str(uuid4()) + self.thread_id = thread_id + self.config = { + "configurable": { + "thread_id": thread_id, + "metadata": {"project": self.project_name}, + }, + "recursion_limit": 100, + } # this message has a reducer which appends the current message to the existing history # see more https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers input = {"messages": [("user", prompt)]} # we stream the steps instead of invoke because it allows us to access intermediate nodes - stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id, "metadata": {"project": self.project_name}}, "recursion_limit": 100}, stream_mode="values") + stream = self.agent.stream( + input, + config=self.config, + stream_mode="values", + ) # Keep track of run IDs from the stream run_ids = [] @@ -110,3 +146,9 @@ def get_agent_trace_url(self) -> str | None: print(traceback.format_exc()) print(separator) return None + + def get_tools(self) -> list[BaseTool]: + return list(self.agent.get_graph().nodes["tools"].data.tools_by_name.values()) + + def get_state(self) -> dict: + return self.agent.get_state(self.config)