From cbb226e187ca8584b83653f986ef3a6e2f324b75 Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Tue, 11 Feb 2025 15:11:17 -0800 Subject: [PATCH 1/4] fix: mcp servers for tools, agent, mods --- pyproject.toml | 3 +- src/codegen/cli/mcp/agent/docs_expert.py | 73 +++++++++++++ src/codegen/cli/mcp/server.py | 19 ++++ src/codegen/extensions/langchain/__init__.py | 54 --------- src/codegen/extensions/langchain/agent.py | 72 +++++++++++- src/codegen/extensions/langchain/tools.py | 54 +++++++-- src/codegen/extensions/mcp/codebase_agent.py | 40 +++++++ src/codegen/extensions/mcp/codebase_mods.py | 47 ++++++++ src/codegen/extensions/mcp/codebase_tools.py | 56 ++++++++++ .../extensions/tools/file_operations.py | 10 +- src/codegen/extensions/tools/reveal_symbol.py | 6 +- uv.lock | 103 +++++++++++++++++- 12 files changed, 463 insertions(+), 74 deletions(-) create mode 100644 src/codegen/cli/mcp/agent/docs_expert.py delete mode 100644 src/codegen/extensions/langchain/__init__.py create mode 100644 src/codegen/extensions/mcp/codebase_agent.py create mode 100644 src/codegen/extensions/mcp/codebase_mods.py create mode 100644 src/codegen/extensions/mcp/codebase_tools.py diff --git a/pyproject.toml b/pyproject.toml index 8e4541c33..399bc3ac6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ dependencies = [ "hatch-vcs>=0.4.0", "hatchling>=1.25.0", "pyinstrument>=5.0.0", - "pip>=24.3.1", # This is needed for some NPM/YARN/PNPM post-install scripts to work! + "pip>=24.3.1", # This is needed for some NPM/YARN/PNPM post-install scripts to work! "rich-click>=1.8.5", "python-dotenv>=1.0.1", "giturlparse", @@ -66,6 +66,7 @@ dependencies = [ "langchain_core", "langchain_openai", "numpy>=2.2.2", + "mcp[cli]", ] license = { text = "Apache-2.0" } diff --git a/src/codegen/cli/mcp/agent/docs_expert.py b/src/codegen/cli/mcp/agent/docs_expert.py new file mode 100644 index 000000000..62941e43c --- /dev/null +++ b/src/codegen/cli/mcp/agent/docs_expert.py @@ -0,0 +1,73 @@ +"""Demo implementation of an agent with Codegen tools.""" + +from langchain import hub +from langchain.agents import AgentExecutor +from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent +from langchain_core.runnables.history import RunnableWithMessageHistory +from langchain_openai import ChatOpenAI +from langchain_core.messages import BaseMessage +from codegen.extensions.langchain.agent import create_codebase_agent +from codegen.sdk.core.codebase import Codebase + + + +AGENT_INSTRUCTIONS = """ +Instruction Set for Codegen SDK Expert Agent + +Overview: +This instruction set is designed for an agent that is an expert on the Codegen SDK, specifically the Python library. The agent will be asked questions about the SDK, including classes, utilities, properties, and how to accomplish tasks using the SDK. The goal is to provide helpful responses that assist users in achieving their tasks with the SDK. + +Key Responsibilities: +1. Expertise in Codegen SDK: + - The agent is an expert on the Codegen SDK, with a deep understanding of its components and functionalities. + - It should be able to provide detailed explanations of classes, utilities, and properties defined in the SDK. + +2. Answering Questions: + - The agent will be asked questions about the Codegen SDK, such as: + - "Find all imports" + - "How do I add an import for a symbol?" + - "What is a statement object?" + - Responses should be clear, concise, and directly address the user's query. + +3. Task-Oriented Responses: + - The user is typically accomplishing a task using the Codegen SDK. + - Responses should be helpful toward that goal, providing guidance and solutions that facilitate task completion. + +4. Python Library Focus: + - Assume that questions are related to the Codegen SDK Python library. + - Provide Python-specific examples and explanations when applicable. + +Use the provided agent tools to look up additional information if needed. +By following this instruction set, the agent will be well-equipped to assist users in effectively utilizing the Codegen SDK for their projects. +""" + +def create_sdk_expert_agent( + codebase: Codebase, + model_name: str = "gpt-4o", + temperature: float = 0, + verbose: bool = True, +) -> RunnableWithMessageHistory: + """Create an agent with all codebase tools. + + Args: + codebase: The codebase to operate on + model_name: Name of the model to use (default: gpt-4) + temperature: Model temperature (default: 0) + verbose: Whether to print agent's thought process (default: True) + + Returns: + Initialized agent with message history + """ + # Initialize language model + + system_message: BaseMessage = BaseMessage(content=AGENT_INSTRUCTIONS, type="SYSTEM") + + agent = create_codebase_agent( + chat_history=[system_message], + codebase=codebase, + model_name=model_name, + temperature=temperature, + verbose=verbose + ) + + return agent diff --git a/src/codegen/cli/mcp/server.py b/src/codegen/cli/mcp/server.py index 22d8c62da..0a9000db4 100644 --- a/src/codegen/cli/mcp/server.py +++ b/src/codegen/cli/mcp/server.py @@ -1,5 +1,8 @@ from typing import Annotated, Any +from codegen.cli.mcp.agent.docs_expert import create_sdk_expert_agent +from codegen.extensions.vector_index import VectorIndex +from codegen.sdk.core.codebase import Codebase from mcp.server.fastmcp import Context, FastMCP from codegen.cli.api.client import RestAPI @@ -39,6 +42,22 @@ def get_service_config() -> dict[str, Any]: # ----- TOOLS ----- +@mcp.tool() +def ask_codegen_sdk(query: Annotated[str, "Ask a question to an exper agent for details about any aspect of the codegen sdk core set of classes and utilities"]): + codebase = Codebase("../../sdk/core") + agent = create_sdk_expert_agent( + codebase=codebase + ) + + result = agent.invoke({ + "input": query + }, + config={"configurable": {"session_id": "demo"}}, +) + + return result['output'] + + @mcp.tool() def generate_codemod( title: Annotated[str, "The title of the codemod (hyphenated)"], diff --git a/src/codegen/extensions/langchain/__init__.py b/src/codegen/extensions/langchain/__init__.py deleted file mode 100644 index 58cdfa4ea..000000000 --- a/src/codegen/extensions/langchain/__init__.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Langchain tools for workspace operations.""" - -from langchain.tools import BaseTool - -from codegen import Codebase - -from .tools import ( - CommitTool, - CreateFileTool, - DeleteFileTool, - EditFileTool, - ListDirectoryTool, - RevealSymbolTool, - SearchTool, - SemanticEditTool, - ViewFileTool, -) - -__all__ = [ - # Tool classes - "CommitTool", - "CreateFileTool", - "DeleteFileTool", - "EditFileTool", - "ListDirectoryTool", - "RevealSymbolTool", - "SearchTool", - "SemanticEditTool", - "ViewFileTool", - # Helper functions - "get_workspace_tools", -] - - -def get_workspace_tools(codebase: Codebase) -> list[BaseTool]: - """Get all workspace tools initialized with a codebase. - - Args: - codebase: The codebase to operate on - - Returns: - List of initialized Langchain tools - """ - return [ - ViewFileTool(codebase), - ListDirectoryTool(codebase), - SearchTool(codebase), - EditFileTool(codebase), - CreateFileTool(codebase), - DeleteFileTool(codebase), - CommitTool(codebase), - RevealSymbolTool(codebase), - SemanticEditTool(codebase), - ] diff --git a/src/codegen/extensions/langchain/agent.py b/src/codegen/extensions/langchain/agent.py index 458903c24..3e3a52fdc 100644 --- a/src/codegen/extensions/langchain/agent.py +++ b/src/codegen/extensions/langchain/agent.py @@ -1,9 +1,10 @@ """Demo implementation of an agent with Codegen tools.""" -from langchain import hub +from langchain.hub import pull from langchain.agents import AgentExecutor from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent -from langchain_core.chat_history import ChatMessageHistory +from langchain_core.chat_history import InMemoryChatMessageHistory +from langchain_core.messages import BaseMessage from langchain_core.runnables.history import RunnableWithMessageHistory from langchain_openai import ChatOpenAI @@ -29,6 +30,7 @@ def create_codebase_agent( model_name: str = "gpt-4o", temperature: float = 0, verbose: bool = True, + chat_history: list[BaseMessage] = [], ) -> RunnableWithMessageHistory: """Create an agent with all codebase tools. @@ -63,7 +65,7 @@ def create_codebase_agent( ] # Get the prompt to use - prompt = hub.pull("hwchase17/openai-functions-agent") + prompt = pull("hwchase17/openai-functions-agent") # Create the agent agent = OpenAIFunctionsAgent( @@ -80,7 +82,7 @@ def create_codebase_agent( ) # Create message history handler - message_history = ChatMessageHistory() + message_history = InMemoryChatMessageHistory(messages=chat_history) # Wrap with message history return RunnableWithMessageHistory( @@ -89,3 +91,65 @@ def create_codebase_agent( input_messages_key="input", history_messages_key="chat_history", ) + + +def create_codebase_inspector_agent( + codebase: Codebase, + model_name: str = "gpt-4o", + temperature: float = 0, + verbose: bool = True, + chat_history: list[BaseMessage] = [], +) -> RunnableWithMessageHistory: + """Create an agent with all codebase tools. + + Args: + codebase: The codebase to operate on + model_name: Name of the model to use (default: gpt-4) + temperature: Model temperature (default: 0) + verbose: Whether to print agent's thought process (default: True) + + Returns: + Initialized agent with message history + """ + # Initialize language model + llm = ChatOpenAI( + model_name=model_name, + temperature=temperature, + ) + + # Get all codebase tools + tools = [ + ViewFileTool(codebase), + ListDirectoryTool(codebase), + SearchTool(codebase), + DeleteFileTool(codebase), + RevealSymbolTool(codebase), + ] + + # Get the prompt to use + prompt = pull("codegen-agent/codebase-agent") + + # Create the agent + agent = OpenAIFunctionsAgent( + llm=llm, + tools=tools, + prompt=prompt, + ) + + # Create the agent executor + agent_executor = AgentExecutor( + agent=agent, + tools=tools, + verbose=verbose, + ) + + # Create message history handler + message_history = InMemoryChatMessageHistory(messages=chat_history) + + # Wrap with message history + return RunnableWithMessageHistory( + agent_executor, + lambda session_id: message_history, + input_messages_key="input", + history_messages_key="chat_history", + ) \ No newline at end of file diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py index fcfcd2997..6b6482282 100644 --- a/src/codegen/extensions/langchain/tools.py +++ b/src/codegen/extensions/langchain/tools.py @@ -5,7 +5,6 @@ from langchain.tools import BaseTool from pydantic import BaseModel, Field - from codegen import Codebase from ..tools import ( @@ -312,20 +311,20 @@ def _run( return json.dumps(result, indent=2) +class SemanticSearchInput(BaseModel): + """Input for Semant search of a codebase""" + + query: str = Field(..., description="The natural language search query") + k: int = Field(default=5, description="Number of results to return") + preview_length: int = Field(default=200, description="Length of content preview in characters") + + class SemanticSearchTool(BaseTool): """Tool for semantic code search.""" name: ClassVar[str] = "semantic_search" description: ClassVar[str] = "Search the codebase using natural language queries and semantic similarity" - args_schema: ClassVar[type[BaseModel]] = type( - "SemanticSearchInput", - (BaseModel,), - { - "query": (str, Field(..., description="The natural language search query")), - "k": (int, Field(default=5, description="Number of results to return")), - "preview_length": (int, Field(default=200, description="Length of content preview in characters")), - }, - ) + args_schema: ClassVar[type[BaseModel]] = SemanticSearchInput codebase: Codebase = Field(exclude=True) def __init__(self, codebase: Codebase) -> None: @@ -334,3 +333,38 @@ def __init__(self, codebase: Codebase) -> None: def _run(self, query: str, k: int = 5, preview_length: int = 200) -> str: result = semantic_search(self.codebase, query, k=k, preview_length=preview_length) return json.dumps(result, indent=2) + + + + +def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]: + """Get all workspace tools initialized with a codebase. + + Args: + codebase: The codebase to operate on + + Returns: + List of initialized Langchain tools + """ + from .tools import ( + CommitTool, + CreateFileTool, + DeleteFileTool, + EditFileTool, + ListDirectoryTool, + RevealSymbolTool, + SearchTool, + SemanticEditTool, + ViewFileTool, + ) + return [ + ViewFileTool(codebase), + ListDirectoryTool(codebase), + SearchTool(codebase), + EditFileTool(codebase), + CreateFileTool(codebase), + DeleteFileTool(codebase), + CommitTool(codebase), + RevealSymbolTool(codebase), + SemanticEditTool(codebase), + ] diff --git a/src/codegen/extensions/mcp/codebase_agent.py b/src/codegen/extensions/mcp/codebase_agent.py new file mode 100644 index 000000000..f36e3c81f --- /dev/null +++ b/src/codegen/extensions/mcp/codebase_agent.py @@ -0,0 +1,40 @@ +import json +from typing import Annotated, Any +from codegen.extensions.langchain.agent import create_codebase_inspector_agent +from codegen.sdk.core.codebase import Codebase +from mcp.server.fastmcp import FastMCP +import os + +from codegen.sdk.enums import ProgrammingLanguage +# Initialize FastMCP server + +mcp = FastMCP("codebase-agent-mcp", instructions="Use this server to access any information from your codebase. This tool can provide information ranging from AST Symbol details and information from across the codebase. Use this tool for all questions, queries regarding your codebase.") + + +@mcp.tool(name="query_codebase", description="Query your codebase for information about symbols, dependencies, files, anything") +def query_codebase( + query: Annotated[str, "A question or prompt requesting information about or on some aspect of your codebase, for example 'find all usages of the method 'foobar', include as much information as possible"], + codebase_dir: Annotated[str, "Absolute path to the codebase root directory. It is highly encouraged to provide the root codebase directory and not a sub directory"], + codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"] + ): + + # Check if codebase directory exists + if not os.path.exists(codebase_dir): + return { + "error": f"Codebase directory '{codebase_dir}' does not exist. Please provide a valid directory path." + } + # Initialize codebase + codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language) + + # Create the agent + agent = create_codebase_inspector_agent(codebase=codebase, model_name="gpt-4", verbose=True) + + result = agent.invoke({"input": query}, config={"configurable": {"session_id": "demo"}}) + + return result['output'] + + +if __name__ == "__main__": + # Initialize and run the server + print("Starting codebase agent server...") + mcp.run(transport="stdio") diff --git a/src/codegen/extensions/mcp/codebase_mods.py b/src/codegen/extensions/mcp/codebase_mods.py new file mode 100644 index 000000000..5aa33aad7 --- /dev/null +++ b/src/codegen/extensions/mcp/codebase_mods.py @@ -0,0 +1,47 @@ +import json +from typing import Annotated +from codegen.sdk.core.codebase import Codebase +from mcp.server.fastmcp import FastMCP +from codegen.sdk.enums import ProgrammingLanguage +import os + + +mcp = FastMCP("codebase-mods-mcp", instructions="Use this server to invoke deterministic codemods for your codebase. This implements a variety of codemods to be used to modify your codebase to your satisfaction") + +@mcp.tool(name="split_files_by_function", description="split out the functions in defined in the provided file into new files") +def split_files_by_function( + target_file: Annotated[str, "file path to the target file to split"], + codebase_dir: Annotated[str, "Absolute path to the codebase root directory. It is highly encouraged to provide the root codebase directory and not a sub directory"], + codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"] +): + + if not os.path.exists(codebase_dir): + return { + "error": f"Codebase directory '{codebase_dir}' does not exist. Please provide a valid directory path." + } + codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language) + new_files = {} + file = codebase.get_file(target_file) + # for each test_function in the file + for function in file.functions: + # Create a new file for each test function using its name + new_file = codebase.create_file(f'{file.directory.path}/{function.name}.py', sync=False) + + print(f'🚠 🚠 Moving `{function.name}` to new file `{new_file.name}`') + # Move the test function to the newly created file + function.move_to_file(new_file) + new_files[new_file.filepath] = [function.name] + + codebase.commit() + + result = { + "description": "the following new files have been created with each with containing the function specified", + 'new_files': new_files + } + + return json.dumps(result, indent=2) + +if __name__ == "__main__": + # Initialize and run the server + print("Starting codebase mods server...") + mcp.run(transport="stdio") \ No newline at end of file diff --git a/src/codegen/extensions/mcp/codebase_tools.py b/src/codegen/extensions/mcp/codebase_tools.py new file mode 100644 index 000000000..25a49a21e --- /dev/null +++ b/src/codegen/extensions/mcp/codebase_tools.py @@ -0,0 +1,56 @@ +from codegen.extensions.mcp.reveal_symbol_tool import mcp +from codegen.extensions.mcp.search_codebase_tool import mcp +import json +from typing import Annotated, Any, Literal, Optional +from codegen.extensions.tools import reveal_symbol +from codegen.extensions.tools.search import search +from codegen.extensions.vector_index import VectorIndex +from codegen.sdk.core.codebase import Codebase +from mcp.server.fastmcp import FastMCP +from codegen.sdk.enums import ProgrammingLanguage + +mcp = FastMCP("codebase-tools-mcp", instructions="Use this server to access any information from your codebase. This tool can provide information ranging from AST Symbol details and information from across the codebase. Use this tool for all questions, queries regarding your codebase.") + +@mcp.tool(name="reveal_symbol", description="Reveal the dependencies and usages of a symbol up to N degrees") +def reveal_symbol_tool( + symbol_name: Annotated[str, "Name of the symbol to inspect"], + target_file: Annotated[Optional[str], "The file path of the file containing the symbol to inspect"], + codebase_dir: Annotated[str, "The root directory of your codebase"], + codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"], + degree: Annotated[Optional[int], "depth do which symbol information is retrieved"], + collect_dependencies: Annotated[Optional[bool], "includes dependencies of symbol"], + collect_usages: Annotated[Optional[bool], "includes usages of symbol"] +): + codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language) + found_symbol = None + if target_file: + file = codebase.get_file(target_file) + found_symbol = file.get_symbol(symbol_name) + else: + found_symbol = codebase.get_symbol(symbol_name) + + result = reveal_symbol( + found_symbol, + degree, + collect_dependencies=collect_dependencies, + collect_usages=collect_usages, + ) + return json.dumps(result, indent=2) + +@mcp.tool(name="search_codebase", description="Search the codebase using text search or regex pattern matching") +def search_codebase_tool( + query: str, + target_directories: Annotated[Optional[list[str]], "list of directories to search within"], + codebase_dir: Annotated[str, "The root directory of your codebase"], + codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"], + use_regex: Annotated[bool, "use regex for the search query"] +): + codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language) + result = search(codebase, query, target_directories, use_regex=use_regex) + return json.dumps(result, indent=2) + + +if __name__ == "__main__": + # Initialize and run the server + print("Starting codebase toolsen server...") + mcp.run(transport="stdio") diff --git a/src/codegen/extensions/tools/file_operations.py b/src/codegen/extensions/tools/file_operations.py index 8f903b30f..253f30f57 100644 --- a/src/codegen/extensions/tools/file_operations.py +++ b/src/codegen/extensions/tools/file_operations.py @@ -17,10 +17,18 @@ def view_file(codebase: Codebase, filepath: str) -> dict[str, Any]: Returns: Dict containing file contents and metadata, or error information if file not found """ + file = None + try: file = codebase.get_file(filepath) except ValueError: - return {"error": f"File not found: {filepath}"} + pass + + if not file: + for f in codebase.files: + if f.file_path.endswith(filepath): + file = f + break if not file: return {"error": f"File not found: {filepath}"} diff --git a/src/codegen/extensions/tools/reveal_symbol.py b/src/codegen/extensions/tools/reveal_symbol.py index d8eed2121..04eb01746 100644 --- a/src/codegen/extensions/tools/reveal_symbol.py +++ b/src/codegen/extensions/tools/reveal_symbol.py @@ -212,10 +212,10 @@ def under_token_limit() -> bool: def reveal_symbol( symbol: Symbol, - degree: int = 1, + degree: Optional[int] = 1, max_tokens: Optional[int] = None, - collect_dependencies: bool = True, - collect_usages: bool = True, + collect_dependencies: Optional[bool] = True, + collect_usages: Optional[bool] = True, ) -> dict[str, Any]: """Reveal the dependencies and usages of a symbol up to N degrees. diff --git a/uv.lock b/uv.lock index f5220bb2d..86b7e82ce 100644 --- a/uv.lock +++ b/uv.lock @@ -628,6 +628,7 @@ dev = [ { name = "jsbeautifier" }, { name = "jupyterlab" }, { name = "loguru" }, + { name = "modal" }, { name = "mypy", extra = ["faster-cache", "mypyc"] }, { name = "pre-commit" }, { name = "pre-commit-uv" }, @@ -671,7 +672,7 @@ requires-dist = [ { name = "numpy", specifier = ">=2.2.2" }, { name = "openai", specifier = "==1.61.1" }, { name = "pip", specifier = ">=24.3.1" }, - { name = "plotly", specifier = ">=5.24.0,<6.0.0" }, + { name = "plotly", specifier = ">=5.24.0,<7.0.0" }, { name = "psutil", specifier = ">=5.8.0" }, { name = "pydantic", specifier = ">=2.9.2,<3.0.0" }, { name = "pydantic-core", specifier = ">=2.23.4" }, @@ -732,6 +733,7 @@ dev = [ { name = "jsbeautifier", specifier = ">=1.15.1,<2.0.0" }, { name = "jupyterlab", specifier = ">=4.3.5" }, { name = "loguru", specifier = ">=0.7.3" }, + { name = "modal", specifier = ">=0.73.25" }, { name = "mypy", extras = ["mypyc", "faster-cache"], specifier = ">=1.13.0" }, { name = "pre-commit", specifier = ">=4.0.1" }, { name = "pre-commit-uv", specifier = ">=4.1.4" }, @@ -1294,6 +1296,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/38/08cc303ddddc4b3d7c628c3039a61a3aae36c241ed01393d00c2fd663473/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6", size = 1142112 }, ] +[[package]] +name = "grpclib" +version = "0.4.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "h2" }, + { name = "multidict" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/79/b9/55936e462a5925190d7427e880b3033601d1effd13809b483d13a926061a/grpclib-0.4.7.tar.gz", hash = "sha256:2988ef57c02b22b7a2e8e961792c41ccf97efc2ace91ae7a5b0de03c363823c3", size = 61254 } + [[package]] name = "h11" version = "0.14.0" @@ -1303,6 +1315,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, ] +[[package]] +name = "h2" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1b/38/d7f80fd13e6582fb8e0df8c9a653dcc02b03ca34f4d72f34869298c5baf8/h2-4.2.0.tar.gz", hash = "sha256:c8a52129695e88b1a0578d8d2cc6842bbd79128ac685463b887ee278126ad01f", size = 2150682 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/9e/984486f2d0a0bd2b024bf4bc1c62688fcafa9e61991f041fb0e2def4a982/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0", size = 60957 }, +] + [[package]] name = "hatch-vcs" version = "0.4.0" @@ -1331,6 +1356,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/08/e7/ae38d7a6dfba0533684e0b2136817d667588ae3ec984c1a4e5df5eb88482/hatchling-1.27.0-py3-none-any.whl", hash = "sha256:d3a2f3567c4f926ea39849cdf924c7e99e6686c9c8e288ae1037c8fa2a5d937b", size = 75794 }, ] +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357 }, +] + [[package]] name = "httpcore" version = "1.0.7" @@ -1408,6 +1442,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/92/75/4bc3e242ad13f2e6c12e0b0401ab2c5e5c6f0d7da37ec69bc808e24e0ccb/humanize-4.11.0-py3-none-any.whl", hash = "sha256:b53caaec8532bcb2fff70c8826f904c35943f8cecaca29d272d9df38092736c0", size = 128055 }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007 }, +] + [[package]] name = "identify" version = "2.6.7" @@ -2112,6 +2155,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/02/c66bdfdadbb021adb642ca4e8a5ed32ada0b4a3e4b39c5d076d19543452f/mistune-3.1.1-py3-none-any.whl", hash = "sha256:02106ac2aa4f66e769debbfa028509a275069dcffce0dfa578edd7b991ee700a", size = 53696 }, ] +[[package]] +name = "modal" +version = "0.73.31" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "certifi" }, + { name = "click" }, + { name = "fastapi" }, + { name = "grpclib" }, + { name = "protobuf" }, + { name = "rich" }, + { name = "synchronicity" }, + { name = "toml" }, + { name = "typer" }, + { name = "types-certifi" }, + { name = "types-toml" }, + { name = "typing-extensions" }, + { name = "watchfiles" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/54/4102a1dbea8da32537606c2e084001eee30e0c2ad1dc336bdb48310d8500/modal-0.73.31-py3-none-any.whl", hash = "sha256:03d7dce03729976d6d3ef80cd79be90ffb74e28bbfbb20a60878e61e820e92e6", size = 533251 }, +] + [[package]] name = "multidict" version = "6.1.0" @@ -3548,6 +3615,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, ] +[[package]] +name = "sigtools" +version = "4.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/db/669ca14166814da187b3087b908ca924cf83f5b504fe23b3859a3ef67d4f/sigtools-4.0.1.tar.gz", hash = "sha256:4b8e135a9cd4d2ea00da670c093372d74e672ba3abb87f4c98d8e73dea54445c", size = 71910 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/91/853dbf6ec096197dba9cd5fd0c836c5fc19142038b7db60ebe6332b1bab1/sigtools-4.0.1-py2.py3-none-any.whl", hash = "sha256:d216b4cf920bbab0fce636ddc429ed8463a5b533d9e1492acb45a2a1bc36ac6c", size = 76419 }, +] + [[package]] name = "six" version = "1.17.0" @@ -3662,6 +3741,19 @@ pytest = [ { name = "pytest" }, ] +[[package]] +name = "synchronicity" +version = "0.9.11" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sigtools" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/52/f34a9ab6d514e0808d0f572affb360411d596b3439107318c00889277dd6/synchronicity-0.9.11.tar.gz", hash = "sha256:cb5dbbcb43d637e516ae50db05a776da51a705d1e1a9c0e301f6049afc3c2cae", size = 50323 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/d5/7675cd9b8e18f05b9ea261acad5d197fcb8027d2a65b1a750427ec084593/synchronicity-0.9.11-py3-none-any.whl", hash = "sha256:231129654d2f56b1aa148e85ebd8545231be135771f6d2196d414175b1594ef6", size = 36827 }, +] + [[package]] name = "tabulate" version = "0.9.0" @@ -3921,6 +4013,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/cc/0a838ba5ca64dc832aa43f727bd586309846b0ffb2ce52422543e6075e8a/typer-0.15.1-py3-none-any.whl", hash = "sha256:7994fb7b8155b64d3402518560648446072864beefd44aa2dc36972a5972e847", size = 44908 }, ] +[[package]] +name = "types-certifi" +version = "2021.10.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/68/943c3aeaf14624712a0357c4a67814dba5cea36d194f5c764dad7959a00c/types-certifi-2021.10.8.3.tar.gz", hash = "sha256:72cf7798d165bc0b76e1c10dd1ea3097c7063c42c21d664523b928e88b554a4f", size = 2095 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/63/2463d89481e811f007b0e1cd0a91e52e141b47f9de724d20db7b861dcfec/types_certifi-2021.10.8.3-py3-none-any.whl", hash = "sha256:b2d1e325e69f71f7c78e5943d410e650b4707bb0ef32e4ddf3da37f54176e88a", size = 2136 }, +] + [[package]] name = "types-networkx" version = "3.4.2.20241227" From 9dca1c86ead2988e574e8f680b877b2eb0f90021 Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Tue, 11 Feb 2025 15:33:39 -0800 Subject: [PATCH 2/4] add readme for mcp servers --- src/codegen/extensions/mcp/README.md | 43 ++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 src/codegen/extensions/mcp/README.md diff --git a/src/codegen/extensions/mcp/README.md b/src/codegen/extensions/mcp/README.md new file mode 100644 index 000000000..ba796afba --- /dev/null +++ b/src/codegen/extensions/mcp/README.md @@ -0,0 +1,43 @@ +# Codegen MCP Servers + +This directory contains reference implementations of MCP (Machine Control Protocol) servers that extend AI Agent capabilities using the Codegen SDK. These servers enable AI Agents to: + +- Query and analyze your codebase (`codebase_agent.py`) +- Run deterministic codemods (`codebase_mods.py`) +- Invoke tools built with Codegen SDK (`codebase_tools.py`) + +## What is MCP? + +MCP (Model Context Protocol) allows AI Agents to interact with local tools and services through a standardized interface. The servers in this directory demonstrate how you might write an MCP server that leverages Codegen's capabilities. + +## Setup Instructions + +### Cline + +Add this to your `cline_mcp_settings.json` file to get started: + +``` +{ + "mcpServers": { + "codegen-cli": { + "command": "uv", + "args": [ + "--directory", + "/codegen-sdk/src/codegen/extensions/mcp", + "run", + "codebase_agent.py | codebase_mods | codebase_tools" + ] + } + } +} +``` + +### Cursor: +Under the `Settings` > `Feature` > `MCP Servers` section, click "Add New MCP Server" and add the following: + +``` +Name: codegen-mcp +Type: Command +Command: uv --directory /codegen-sdk/src/codegen/cli/mcp run +``` + From 7548178b20012c7f4b7077e22b910d8d57e6aeac Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Tue, 11 Feb 2025 17:09:21 -0800 Subject: [PATCH 3/4] fix: add docs for creating an mcp --- docs/introduction/ide-usage.mdx | 6 +- docs/mint.json | 3 +- docs/tutorials/build-mcp.mdx | 75 ++++++++++++++++++++ src/codegen/extensions/mcp/codebase_tools.py | 5 +- 4 files changed, 81 insertions(+), 8 deletions(-) create mode 100644 docs/tutorials/build-mcp.mdx diff --git a/docs/introduction/ide-usage.mdx b/docs/introduction/ide-usage.mdx index e6cae8f07..f5ae5809b 100644 --- a/docs/introduction/ide-usage.mdx +++ b/docs/introduction/ide-usage.mdx @@ -59,8 +59,8 @@ it will allow an agent to: - improve a codemod - get setup instructions -### Configuration -#### Usage with Cline: +### IDE Configuration +#### Cline Add this to your cline_mcp_settings.json: ``` { @@ -79,7 +79,7 @@ Add this to your cline_mcp_settings.json: ``` -#### Usage with Cursor: +#### Cursor: Under the `Settings` > `Feature` > `MCP Servers` section, click "Add New MCP Server" and add the following: ``` diff --git a/docs/mint.json b/docs/mint.json index 4e41df7fb..e67ca33e9 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -99,7 +99,8 @@ "tutorials/sqlalchemy-1.6-to-2.0", "tutorials/fixing-import-loops-in-pytorch", "tutorials/python2-to-python3", - "tutorials/flask-to-fastapi" + "tutorials/flask-to-fastapi", + "tutorials/build-mcp" ] }, { diff --git a/docs/tutorials/build-mcp.mdx b/docs/tutorials/build-mcp.mdx new file mode 100644 index 000000000..dae0573bb --- /dev/null +++ b/docs/tutorials/build-mcp.mdx @@ -0,0 +1,75 @@ +--- +title: "Building a Model Context Protocol server with Codegen" +sidebarTitle: "MCP Server" +icon: "boxes-stacked" +iconType: "solid" +--- + +Learn how to build a Model Context Protocol (MCP) server that enables AI models to understand and manipulate code using Codegen's powerful tools. + +This guide will walk you through creating an MCP server that can provide semantic code search + +View the full code in our [examples repository](https://github.com/codegen-sh/codegen-sdk/tree/develop/src/codegen/extensions/mcp) + + +## Setup: +Install the MCP python library +``` +uv pip install mcp +``` + +## Step 1: Setting Up Your MCP Server + +First, let's create a basic MCP server using Codegen's MCP tools: + +server.py +```python +from codegen import Codebase +from mcp.server.fastmcp import FastMCP +from typing import Annotated +# Initialize the codebase +codebase = Codebase.from_repo(".") + +# create the MCP server using FastMCP +mcp = FastMCP(name="demo-mcp", instructions="Use this server for semantic search of codebases") + + +if __name__ == "__main__": + # Initialize and run the server + print("Starting demo mpc server...") + mcp.run(transport="stdio") + +``` + +## Step 2: Create the search tool + +Let's implement the semantic search tool. + +server.py +```python +from codegen.extensions.tools.semantic_search import semantic_search + +.... + +@mcp.tool('codebase_semantic_search', "search codebase with the provided query") +def search(query: Annotated[str, "search query to run against codebase"]): + codebase = Codebase("provide location to codebase", programming_language="provide codebase Language") + # use the semantic search tool from codegen.extenstions.tools OR write your own + results = semantic_search(codebase=codebase, query=query) + return results + +.... +``` + +## Run Your MCP Server + +You can run and inspect your MCP server with: + +``` +mcp dev server.py +``` + +If you'd like to integrate this into an IDE checkout out this [setup guide](/introduction/ide-usage#mcp-server-setup) + +And that's a wrap, chime in at our [community + Slack](https://community.codegen.com) if you have quesions or ideas for additional MCP tools/capabilities \ No newline at end of file diff --git a/src/codegen/extensions/mcp/codebase_tools.py b/src/codegen/extensions/mcp/codebase_tools.py index 25a49a21e..af5abd918 100644 --- a/src/codegen/extensions/mcp/codebase_tools.py +++ b/src/codegen/extensions/mcp/codebase_tools.py @@ -1,10 +1,7 @@ -from codegen.extensions.mcp.reveal_symbol_tool import mcp -from codegen.extensions.mcp.search_codebase_tool import mcp import json from typing import Annotated, Any, Literal, Optional from codegen.extensions.tools import reveal_symbol from codegen.extensions.tools.search import search -from codegen.extensions.vector_index import VectorIndex from codegen.sdk.core.codebase import Codebase from mcp.server.fastmcp import FastMCP from codegen.sdk.enums import ProgrammingLanguage @@ -52,5 +49,5 @@ def search_codebase_tool( if __name__ == "__main__": # Initialize and run the server - print("Starting codebase toolsen server...") + print("Starting codebase tools server...") mcp.run(transport="stdio") From 684c5f6968f6693a3d4384f5310f6b882b5c2828 Mon Sep 17 00:00:00 2001 From: rushilpatel0 <171610820+rushilpatel0@users.noreply.github.com> Date: Wed, 12 Feb 2025 01:12:07 +0000 Subject: [PATCH 4/4] Automated pre-commit update --- pyproject.toml | 2 +- src/codegen/cli/mcp/agent/docs_expert.py | 18 +++-------- src/codegen/cli/mcp/server.py | 20 +++++------- src/codegen/extensions/langchain/agent.py | 4 +-- src/codegen/extensions/langchain/tools.py | 6 ++-- src/codegen/extensions/mcp/README.md | 4 +-- src/codegen/extensions/mcp/codebase_agent.py | 31 ++++++++++--------- src/codegen/extensions/mcp/codebase_mods.py | 32 ++++++++++---------- src/codegen/extensions/mcp/codebase_tools.py | 21 ++++++++----- 9 files changed, 67 insertions(+), 71 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 51f41b7b1..b2dbd9aed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "hatch-vcs>=0.4.0", "hatchling>=1.25.0", "pyinstrument>=5.0.0", - "pip>=24.3.1", # This is needed for some NPM/YARN/PNPM post-install scripts to work! + "pip>=24.3.1", # This is needed for some NPM/YARN/PNPM post-install scripts to work! "rich-click>=1.8.5", "python-dotenv>=1.0.1", "giturlparse", diff --git a/src/codegen/cli/mcp/agent/docs_expert.py b/src/codegen/cli/mcp/agent/docs_expert.py index 62941e43c..fd112062c 100644 --- a/src/codegen/cli/mcp/agent/docs_expert.py +++ b/src/codegen/cli/mcp/agent/docs_expert.py @@ -1,16 +1,11 @@ """Demo implementation of an agent with Codegen tools.""" -from langchain import hub -from langchain.agents import AgentExecutor -from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent -from langchain_core.runnables.history import RunnableWithMessageHistory -from langchain_openai import ChatOpenAI from langchain_core.messages import BaseMessage +from langchain_core.runnables.history import RunnableWithMessageHistory + from codegen.extensions.langchain.agent import create_codebase_agent from codegen.sdk.core.codebase import Codebase - - AGENT_INSTRUCTIONS = """ Instruction Set for Codegen SDK Expert Agent @@ -41,6 +36,7 @@ By following this instruction set, the agent will be well-equipped to assist users in effectively utilizing the Codegen SDK for their projects. """ + def create_sdk_expert_agent( codebase: Codebase, model_name: str = "gpt-4o", @@ -62,12 +58,6 @@ def create_sdk_expert_agent( system_message: BaseMessage = BaseMessage(content=AGENT_INSTRUCTIONS, type="SYSTEM") - agent = create_codebase_agent( - chat_history=[system_message], - codebase=codebase, - model_name=model_name, - temperature=temperature, - verbose=verbose - ) + agent = create_codebase_agent(chat_history=[system_message], codebase=codebase, model_name=model_name, temperature=temperature, verbose=verbose) return agent diff --git a/src/codegen/cli/mcp/server.py b/src/codegen/cli/mcp/server.py index 313131f41..e50be77fe 100644 --- a/src/codegen/cli/mcp/server.py +++ b/src/codegen/cli/mcp/server.py @@ -1,13 +1,12 @@ from typing import Annotated, Any -from codegen.cli.mcp.agent.docs_expert import create_sdk_expert_agent -from codegen.extensions.vector_index import VectorIndex -from codegen.sdk.core.codebase import Codebase from mcp.server.fastmcp import Context, FastMCP from codegen.cli.api.client import RestAPI +from codegen.cli.mcp.agent.docs_expert import create_sdk_expert_agent from codegen.cli.mcp.resources.system_prompt import SYSTEM_PROMPT from codegen.cli.mcp.resources.system_setup_instructions import SETUP_INSTRUCTIONS +from codegen.sdk.core.codebase import Codebase from codegen.shared.enums.programming_language import ProgrammingLanguage # Initialize FastMCP server @@ -45,17 +44,14 @@ def get_service_config() -> dict[str, Any]: @mcp.tool() def ask_codegen_sdk(query: Annotated[str, "Ask a question to an exper agent for details about any aspect of the codegen sdk core set of classes and utilities"]): codebase = Codebase("../../sdk/core") - agent = create_sdk_expert_agent( - codebase=codebase - ) + agent = create_sdk_expert_agent(codebase=codebase) - result = agent.invoke({ - "input": query - }, - config={"configurable": {"session_id": "demo"}}, -) + result = agent.invoke( + {"input": query}, + config={"configurable": {"session_id": "demo"}}, + ) - return result['output'] + return result["output"] @mcp.tool() diff --git a/src/codegen/extensions/langchain/agent.py b/src/codegen/extensions/langchain/agent.py index 39dd1a9fc..9931d830e 100644 --- a/src/codegen/extensions/langchain/agent.py +++ b/src/codegen/extensions/langchain/agent.py @@ -1,8 +1,8 @@ """Demo implementation of an agent with Codegen tools.""" -from langchain.hub import pull from langchain.agents import AgentExecutor from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent +from langchain.hub import pull from langchain_core.chat_history import InMemoryChatMessageHistory from langchain_core.messages import BaseMessage from langchain_core.runnables.history import RunnableWithMessageHistory @@ -154,4 +154,4 @@ def create_codebase_inspector_agent( lambda session_id: message_history, input_messages_key="input", history_messages_key="chat_history", - ) \ No newline at end of file + ) diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py index 28b901315..ee9929f7a 100644 --- a/src/codegen/extensions/langchain/tools.py +++ b/src/codegen/extensions/langchain/tools.py @@ -5,6 +5,7 @@ from langchain.tools import BaseTool from pydantic import BaseModel, Field + from codegen import Codebase from ..tools import ( @@ -313,7 +314,7 @@ def _run( class SemanticSearchInput(BaseModel): """Input for Semantic search of a codebase""" - + query: str = Field(..., description="The natural language search query") k: int = Field(default=5, description="Number of results to return") preview_length: int = Field(default=200, description="Length of content preview in characters") @@ -333,8 +334,6 @@ def __init__(self, codebase: Codebase) -> None: def _run(self, query: str, k: int = 5, preview_length: int = 200) -> str: result = semantic_search(self.codebase, query, k=k, preview_length=preview_length) return json.dumps(result, indent=2) - - def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]: @@ -357,6 +356,7 @@ def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]: SemanticEditTool, ViewFileTool, ) + return [ ViewFileTool(codebase), ListDirectoryTool(codebase), diff --git a/src/codegen/extensions/mcp/README.md b/src/codegen/extensions/mcp/README.md index ba796afba..0f5d3e2a7 100644 --- a/src/codegen/extensions/mcp/README.md +++ b/src/codegen/extensions/mcp/README.md @@ -3,7 +3,7 @@ This directory contains reference implementations of MCP (Machine Control Protocol) servers that extend AI Agent capabilities using the Codegen SDK. These servers enable AI Agents to: - Query and analyze your codebase (`codebase_agent.py`) -- Run deterministic codemods (`codebase_mods.py`) +- Run deterministic codemods (`codebase_mods.py`) - Invoke tools built with Codegen SDK (`codebase_tools.py`) ## What is MCP? @@ -33,6 +33,7 @@ Add this to your `cline_mcp_settings.json` file to get started: ``` ### Cursor: + Under the `Settings` > `Feature` > `MCP Servers` section, click "Add New MCP Server" and add the following: ``` @@ -40,4 +41,3 @@ Name: codegen-mcp Type: Command Command: uv --directory /codegen-sdk/src/codegen/cli/mcp run ``` - diff --git a/src/codegen/extensions/mcp/codebase_agent.py b/src/codegen/extensions/mcp/codebase_agent.py index f36e3c81f..e458017ca 100644 --- a/src/codegen/extensions/mcp/codebase_agent.py +++ b/src/codegen/extensions/mcp/codebase_agent.py @@ -1,28 +1,31 @@ -import json -from typing import Annotated, Any -from codegen.extensions.langchain.agent import create_codebase_inspector_agent -from codegen.sdk.core.codebase import Codebase -from mcp.server.fastmcp import FastMCP import os +from typing import Annotated + +from mcp.server.fastmcp import FastMCP +from codegen.extensions.langchain.agent import create_codebase_inspector_agent +from codegen.sdk.core.codebase import Codebase from codegen.sdk.enums import ProgrammingLanguage + # Initialize FastMCP server -mcp = FastMCP("codebase-agent-mcp", instructions="Use this server to access any information from your codebase. This tool can provide information ranging from AST Symbol details and information from across the codebase. Use this tool for all questions, queries regarding your codebase.") +mcp = FastMCP( + "codebase-agent-mcp", + instructions="Use this server to access any information from your codebase. This tool can provide information ranging from AST Symbol details and information from across the codebase. Use this tool for all questions, queries regarding your codebase.", +) @mcp.tool(name="query_codebase", description="Query your codebase for information about symbols, dependencies, files, anything") def query_codebase( - query: Annotated[str, "A question or prompt requesting information about or on some aspect of your codebase, for example 'find all usages of the method 'foobar', include as much information as possible"], + query: Annotated[ + str, "A question or prompt requesting information about or on some aspect of your codebase, for example 'find all usages of the method 'foobar', include as much information as possible" + ], codebase_dir: Annotated[str, "Absolute path to the codebase root directory. It is highly encouraged to provide the root codebase directory and not a sub directory"], - codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"] - ): - + codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"], +): # Check if codebase directory exists if not os.path.exists(codebase_dir): - return { - "error": f"Codebase directory '{codebase_dir}' does not exist. Please provide a valid directory path." - } + return {"error": f"Codebase directory '{codebase_dir}' does not exist. Please provide a valid directory path."} # Initialize codebase codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language) @@ -31,7 +34,7 @@ def query_codebase( result = agent.invoke({"input": query}, config={"configurable": {"session_id": "demo"}}) - return result['output'] + return result["output"] if __name__ == "__main__": diff --git a/src/codegen/extensions/mcp/codebase_mods.py b/src/codegen/extensions/mcp/codebase_mods.py index 5aa33aad7..cefb41ac6 100644 --- a/src/codegen/extensions/mcp/codebase_mods.py +++ b/src/codegen/extensions/mcp/codebase_mods.py @@ -1,47 +1,47 @@ import json +import os from typing import Annotated -from codegen.sdk.core.codebase import Codebase + from mcp.server.fastmcp import FastMCP + +from codegen.sdk.core.codebase import Codebase from codegen.sdk.enums import ProgrammingLanguage -import os +mcp = FastMCP( + "codebase-mods-mcp", + instructions="Use this server to invoke deterministic codemods for your codebase. This implements a variety of codemods to be used to modify your codebase to your satisfaction", +) -mcp = FastMCP("codebase-mods-mcp", instructions="Use this server to invoke deterministic codemods for your codebase. This implements a variety of codemods to be used to modify your codebase to your satisfaction") @mcp.tool(name="split_files_by_function", description="split out the functions in defined in the provided file into new files") def split_files_by_function( target_file: Annotated[str, "file path to the target file to split"], codebase_dir: Annotated[str, "Absolute path to the codebase root directory. It is highly encouraged to provide the root codebase directory and not a sub directory"], - codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"] + codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"], ): - if not os.path.exists(codebase_dir): - return { - "error": f"Codebase directory '{codebase_dir}' does not exist. Please provide a valid directory path." - } + return {"error": f"Codebase directory '{codebase_dir}' does not exist. Please provide a valid directory path."} codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language) new_files = {} file = codebase.get_file(target_file) # for each test_function in the file for function in file.functions: # Create a new file for each test function using its name - new_file = codebase.create_file(f'{file.directory.path}/{function.name}.py', sync=False) - - print(f'🚠 🚠 Moving `{function.name}` to new file `{new_file.name}`') + new_file = codebase.create_file(f"{file.directory.path}/{function.name}.py", sync=False) + + print(f"🚠 🚠 Moving `{function.name}` to new file `{new_file.name}`") # Move the test function to the newly created file function.move_to_file(new_file) new_files[new_file.filepath] = [function.name] codebase.commit() - result = { - "description": "the following new files have been created with each with containing the function specified", - 'new_files': new_files - } + result = {"description": "the following new files have been created with each with containing the function specified", "new_files": new_files} return json.dumps(result, indent=2) + if __name__ == "__main__": # Initialize and run the server print("Starting codebase mods server...") - mcp.run(transport="stdio") \ No newline at end of file + mcp.run(transport="stdio") diff --git a/src/codegen/extensions/mcp/codebase_tools.py b/src/codegen/extensions/mcp/codebase_tools.py index af5abd918..f87282207 100644 --- a/src/codegen/extensions/mcp/codebase_tools.py +++ b/src/codegen/extensions/mcp/codebase_tools.py @@ -1,12 +1,18 @@ import json -from typing import Annotated, Any, Literal, Optional +from typing import Annotated, Optional + +from mcp.server.fastmcp import FastMCP + from codegen.extensions.tools import reveal_symbol from codegen.extensions.tools.search import search from codegen.sdk.core.codebase import Codebase -from mcp.server.fastmcp import FastMCP from codegen.sdk.enums import ProgrammingLanguage -mcp = FastMCP("codebase-tools-mcp", instructions="Use this server to access any information from your codebase. This tool can provide information ranging from AST Symbol details and information from across the codebase. Use this tool for all questions, queries regarding your codebase.") +mcp = FastMCP( + "codebase-tools-mcp", + instructions="Use this server to access any information from your codebase. This tool can provide information ranging from AST Symbol details and information from across the codebase. Use this tool for all questions, queries regarding your codebase.", +) + @mcp.tool(name="reveal_symbol", description="Reveal the dependencies and usages of a symbol up to N degrees") def reveal_symbol_tool( @@ -16,7 +22,7 @@ def reveal_symbol_tool( codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"], degree: Annotated[Optional[int], "depth do which symbol information is retrieved"], collect_dependencies: Annotated[Optional[bool], "includes dependencies of symbol"], - collect_usages: Annotated[Optional[bool], "includes usages of symbol"] + collect_usages: Annotated[Optional[bool], "includes usages of symbol"], ): codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language) found_symbol = None @@ -34,13 +40,14 @@ def reveal_symbol_tool( ) return json.dumps(result, indent=2) + @mcp.tool(name="search_codebase", description="Search the codebase using text search or regex pattern matching") def search_codebase_tool( - query: str, - target_directories: Annotated[Optional[list[str]], "list of directories to search within"], + query: str, + target_directories: Annotated[Optional[list[str]], "list of directories to search within"], codebase_dir: Annotated[str, "The root directory of your codebase"], codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"], - use_regex: Annotated[bool, "use regex for the search query"] + use_regex: Annotated[bool, "use regex for the search query"], ): codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language) result = search(codebase, query, target_directories, use_regex=use_regex)