ai4curation · cmungall · Aug 16, 2025 · Aug 15, 2025 · Aug 16, 2025 · Aug 16, 2025
diff --git a/README.md b/README.md
@@ -24,10 +24,10 @@ metacoder "Write a Python function to calculate fibonacci numbers" -c claude -w
 ...
 
 # With custom instructions
-metacoder "Refactor this code" -c claude --instructions coding_guidelines.md
+metacoder "Refactor this code" -c claude --instructions coding_guidelines.md -w my-repo
 ...
 
-# Using MCPs
+# Using MCPs (e.g. GitHub MCP)
 metacoder "Fix issue 1234" -w path/to/my-repo --mcp-collection github_mcps.yaml
 ...
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,7 @@ dev = [
     "mkdocstrings-python>=1.14.0",
     "mypy>=1.17.1",
     "pytest>=8.4.1",
+    "ruff>=0.12.8",
     "types-click>=7.1.8",
     "types-pyyaml>=6.0.12.20250516",
 ]
diff --git a/src/metacoder/coders/base_coder.py b/src/metacoder/coders/base_coder.py
@@ -21,10 +21,15 @@
 
 class ToolUse(BaseModel):
     """Tool use from the coder."""
-    name: str = Field(..., description="Name of the tool; e.g. mcp.pubmed.get_paper_fulltext")
+
+    name: str = Field(
+        ..., description="Name of the tool; e.g. mcp.pubmed.get_paper_fulltext"
+    )
     arguments: dict[str, Any] = Field(..., description="Arguments to the tool")
     success: bool = Field(..., description="Whether the tool call was successful")
-    error: str | None = Field(default=None, description="Error message if the tool call failed")
+    error: str | None = Field(
+        default=None, description="Error message if the tool call failed"
+    )
     result: Any = Field(..., description="Result of the tool")
 
 
@@ -87,6 +92,7 @@ class BaseCoder(BaseModel, ABC):
     Subclasses should implement the following methods:
     - run(self, input_text: str) -> CoderOutput: Run the coder on the input text
     """
+
     workdir: str = Field(default="workdir", description="Working dir ")
     config: CoderConfig | None = Field(default=None, description="Config for the coder")
     params: dict | None = Field(default=None, description="Parameters for the coder")
@@ -115,8 +121,6 @@ def validate_mcp_support(self):
                 )
         return self
 
-
-
     @abstractmethod
     def run(self, input_text: str) -> CoderOutput:
         """Run the coder on the input text.
@@ -129,7 +133,6 @@ def run(self, input_text: str) -> CoderOutput:
         """
         raise NotImplementedError
 
-
     @classmethod
     def default_config_paths(cls) -> dict[Path, ConfigFileRole]:
         """Return config files as a dictionary of filename/dirname to role."""
@@ -220,7 +223,6 @@ def stream_output(pipe, output_lines, stream):
 
         return CoderOutput(stdout=stdout_text, stderr=stderr_text)
 
-
     def expand_env(self, env: dict[str, str] | None = None) -> dict[str, str]:
         """
         Expand environment variables in the coder config.
@@ -257,7 +259,7 @@ def expand_env(self, env: dict[str, str] | None = None) -> dict[str, str]:
 
     def expand_prompt(self, input_text: str) -> str:
         """Expand environment variables in the prompt.
-        
+
         Typically this just returns the prompt as is:
 
         Example:
@@ -278,7 +280,7 @@ def expand_prompt(self, input_text: str) -> str:
     def default_config_objects(self) -> list[CoderConfigObject]:
         """Default config objects for the coder."""
         raise NotImplementedError("default_config_objects is not implemented")
-    
+
     def set_instructions(self, instructions: str):
         """Set the instructions for the coder.
 
@@ -291,7 +293,7 @@ def set_instructions(self, instructions: str):
             >>> coder.set_instructions("you are an awesome coder")
             >>> coder.config_objects
             [CoderConfigObject(file_type=<FileType.TEXT: 'text'>, relative_path='CLAUDE.md', content='you are an awesome coder')]
-        
+
         Args:
             instructions: The instructions to set
         """
@@ -300,16 +302,25 @@ def set_instructions(self, instructions: str):
                 if not self.config_objects:
                     self.config_objects = []
                 for obj in self.config_objects:
-                    if obj.relative_path == str(path) or obj.relative_path == str(path.name):
+                    if obj.relative_path == str(path) or obj.relative_path == str(
+                        path.name
+                    ):
                         obj.content = instructions
                         return
                 else:
-                    self.config_objects.append(CoderConfigObject(relative_path=str(path), content=instructions, file_type=FileType.TEXT))
+                    self.config_objects.append(
+                        CoderConfigObject(
+                            relative_path=str(path),
+                            content=instructions,
+                            file_type=FileType.TEXT,
+                        )
+                    )
                     return
             else:
                 raise ValueError(f"Cannot set instructions for {typ}")
-        raise ValueError(f"No primary instruction file found for {self.__class__.__name__}")
-
+        raise ValueError(
+            f"No primary instruction file found for {self.__class__.__name__}"
+        )
 
     def prepare_workdir(self):
         """Prepare the workdir for the coder.
@@ -330,11 +341,7 @@ def prepare_workdir(self):
         # Check if MCP extensions are configured but not supported
         if self.config and self.config.extensions:
             logger.debug(f"🔧 Checking MCP extensions: {self.config.extensions}")
-            mcp_extensions = [
-                ext
-                for ext in self.config.extensions
-                if ext.enabled
-            ]
+            mcp_extensions = [ext for ext in self.config.extensions if ext.enabled]
             if mcp_extensions and not self.supports_mcp():
                 raise ValueError(
                     f"MCP extensions are configured but {self.__class__.__name__} does not support MCP. "
@@ -353,6 +360,7 @@ def prepare_workdir(self):
                     logger.debug(f" 🗑️ Removing old config object: {path}")
                     if path.is_dir():
                         import shutil
+
                         shutil.rmtree(path)
                     else:
                         path.unlink()

diff --git a/src/metacoder/coders/claude.py b/src/metacoder/coders/claude.py
@@ -147,32 +147,36 @@ def run(self, input_text: str) -> CoderOutput:
             # time the command
             start_time = time.time()
             ao = self.run_process(command, env)
+
             # parse the jsonl output
             def parse_jsonl_line(text: str) -> dict[str, Any]:
                 try:
                     result: dict[str, Any] = json.loads(text)
                     return result
                 except json.JSONDecodeError:
                     return {"original": text, "error": "JSONDecodeError"}
+
             ao.structured_messages = [
                 parse_jsonl_line(line) for line in ao.stdout.split("\n") if line
             ]
-            ao.structured_messages = [m for m in ao.structured_messages if m is not None]
+            ao.structured_messages = [
+                m for m in ao.structured_messages if m is not None
+            ]
             total_cost_usd = None
             is_error = None
-            
+
             # Extract tool uses
             tool_uses = []
             pending_tool_uses = {}  # Map tool_use_id to tool data
-            
+
             for message in ao.structured_messages:
                 if "total_cost_usd" in message:
                     total_cost_usd = message["total_cost_usd"]
                 if "is_error" in message:
                     is_error = message["is_error"]
                 if "result" in message:
                     ao.result_text = message["result"]
-                
+
                 # Check for tool_use in assistant messages
                 if message.get("type") == "assistant" and message.get("message"):
                     msg_content = message["message"].get("content", [])
@@ -182,16 +186,16 @@ def parse_jsonl_line(text: str) -> dict[str, Any]:
                                 tool_id = content_item.get("id")
                                 tool_name = content_item.get("name", "")
                                 tool_input = content_item.get("input", {})
-                                
+
                                 # Store pending tool use
                                 pending_tool_uses[tool_id] = {
                                     "name": tool_name,
                                     "arguments": tool_input,
                                     "success": False,  # Default to False until we see result
                                     "error": None,
-                                    "result": None
+                                    "result": None,
                                 }
-                
+
                 # Check for tool_result in user messages
                 elif message.get("type") == "user" and message.get("message"):
                     msg_content = message["message"].get("content", [])
@@ -201,31 +205,35 @@ def parse_jsonl_line(text: str) -> dict[str, Any]:
                                 tool_id = content_item.get("tool_use_id")
                                 if tool_id in pending_tool_uses:
                                     tool_data = pending_tool_uses[tool_id]
-                                    
+
                                     # Update with result
                                     is_tool_error = content_item.get("is_error", False)
                                     tool_data["success"] = not is_tool_error
-                                    tool_data["result"] = content_item.get("content", "")
-
+                                    tool_data["result"] = content_item.get(
+                                        "content", ""
+                                    )
+
                                     if is_tool_error:
-                                        tool_data["error"] = content_item.get("content", "Tool error occurred")
-
+                                        tool_data["error"] = content_item.get(
+                                            "content", "Tool error occurred"
+                                        )
+
                                     # Create ToolUse object
                                     tool_use = ToolUse(**tool_data)
                                     tool_uses.append(tool_use)
-                                    
+
                                     # Remove from pending
                                     del pending_tool_uses[tool_id]
-            
+
             # Add any remaining pending tool uses (shouldn't happen in normal flow)
             for tool_data in pending_tool_uses.values():
                 tool_data["error"] = "No result received for tool call"
                 tool_use = ToolUse(**tool_data)
                 tool_uses.append(tool_use)
-            
+
             if tool_uses:
                 ao.tool_uses = tool_uses
-                
+
             end_time = time.time()
             logger.info(f"🤖 Command took {end_time - start_time} seconds")
             ao.total_cost_usd = total_cost_usd

diff --git a/src/metacoder/coders/codex.py b/src/metacoder/coders/codex.py
@@ -26,7 +26,6 @@ def is_available(cls) -> bool:
         """Check if codex command is available."""
         return shutil.which("codex") is not None
 
-
     @property
     def instructions_path(self) -> Path:
         return Path("AGENTS.md")

diff --git a/src/metacoder/coders/dummy.py b/src/metacoder/coders/dummy.py
@@ -1,17 +1,22 @@
 from pathlib import Path
-from metacoder.coders.base_coder import BaseCoder, CoderConfigObject, CoderOutput, ToolUse
+from metacoder.coders.base_coder import (
+    BaseCoder,
+    CoderConfigObject,
+    CoderOutput,
+    ToolUse,
+)
 from metacoder.configuration import ConfigFileRole
 
 
 class DummyCoder(BaseCoder):
     """
     Dummy coder for testing.
-    
+
     Simulates tool use when input contains keywords:
     - "tool" or "mcp": Adds a generic test tool
     - "search" or "pubmed": Simulates a PubMed search tool
     - "error": Simulates a tool failure
-    
+
     Multiple keywords can trigger multiple tools.
     """
 
@@ -34,58 +39,71 @@ def run(self, input_text: str) -> CoderOutput:
         instructions_content = None
         if self.config_objects:
             for obj in self.config_objects:
-                if obj.relative_path == "INSTRUCTIONS.md" or obj.relative_path == str(Path("INSTRUCTIONS.md")):
+                if obj.relative_path == "INSTRUCTIONS.md" or obj.relative_path == str(
+                    Path("INSTRUCTIONS.md")
+                ):
                     instructions_content = obj.content
                     break
-        
+
         # Create response based on whether instructions exist
         if instructions_content:
-            response = f"Instructions loaded: {instructions_content}\nProcessing: {input_text}"
+            response = (
+                f"Instructions loaded: {instructions_content}\nProcessing: {input_text}"
+            )
         else:
             response = f"you said: {input_text}"
-            
+
         output = CoderOutput(
             stdout=response,
             stderr="",
             result_text=response,
         )
-        
+
         # Add fake tool uses if input mentions tools, MCP, or specific services
-        if any(keyword in input_text.lower() for keyword in ["tool", "mcp", "pubmed", "search"]):
+        if any(
+            keyword in input_text.lower()
+            for keyword in ["tool", "mcp", "pubmed", "search"]
+        ):
             # Create some fake tool uses for testing
             tool_uses = []
-            
+
             # Simulate a successful tool call
             if "search" in input_text.lower() or "pubmed" in input_text.lower():
-                tool_uses.append(ToolUse(
-                    name="mcp__pubmed__search_papers",
-                    arguments={"query": "test query", "limit": 10},
-                    success=True,
-                    error=None,
-                    result={"papers": ["paper1", "paper2"], "count": 2}
-                ))
-
+                tool_uses.append(
+                    ToolUse(
+                        name="mcp__pubmed__search_papers",
+                        arguments={"query": "test query", "limit": 10},
+                        success=True,
+                        error=None,
+                        result={"papers": ["paper1", "paper2"], "count": 2},
+                    )
+                )
+
             # Simulate a tool with an error
             if "error" in input_text.lower():
-                tool_uses.append(ToolUse(
-                    name="mcp__test__failing_tool", 
-                    arguments={"param": "value"},
-                    success=False,
-                    error="Simulated tool error for testing",
-                    result=None
-                ))
-
+                tool_uses.append(
+                    ToolUse(
+                        name="mcp__test__failing_tool",
+                        arguments={"param": "value"},
+                        success=False,
+                        error="Simulated tool error for testing",
+                        result=None,
+                    )
+                )
+
             # Default tool if no specific keywords but general tool/mcp mentioned
             if not tool_uses:
-                tool_uses.append(ToolUse(
-                    name="mcp__dummy__test_tool",
-                    arguments={"input": input_text},
-                    success=True,
-                    error=None,
-                    result="Test tool executed successfully"
-                ))
-
+                tool_uses.append(
+                    ToolUse(
+                        name="mcp__dummy__test_tool",
+                        arguments={"input": input_text},
+                        success=True,
+                        error=None,
+                        result="Test tool executed successfully",
+                    )
+                )
+
             if tool_uses:
                 output.tool_uses = tool_uses
-        
+
         return output