diff --git a/README.md b/README.md index b591c01..e4e6846 100644 --- a/README.md +++ b/README.md @@ -24,10 +24,10 @@ metacoder "Write a Python function to calculate fibonacci numbers" -c claude -w ... # With custom instructions -metacoder "Refactor this code" -c claude --instructions coding_guidelines.md +metacoder "Refactor this code" -c claude --instructions coding_guidelines.md -w my-repo ... -# Using MCPs +# Using MCPs (e.g. GitHub MCP) metacoder "Fix issue 1234" -w path/to/my-repo --mcp-collection github_mcps.yaml ... diff --git a/pyproject.toml b/pyproject.toml index 5a09dfa..020908e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dev = [ "mkdocstrings-python>=1.14.0", "mypy>=1.17.1", "pytest>=8.4.1", + "ruff>=0.12.8", "types-click>=7.1.8", "types-pyyaml>=6.0.12.20250516", ] diff --git a/src/metacoder/coders/base_coder.py b/src/metacoder/coders/base_coder.py index a8be80b..b44c6ec 100644 --- a/src/metacoder/coders/base_coder.py +++ b/src/metacoder/coders/base_coder.py @@ -21,10 +21,15 @@ class ToolUse(BaseModel): """Tool use from the coder.""" - name: str = Field(..., description="Name of the tool; e.g. mcp.pubmed.get_paper_fulltext") + + name: str = Field( + ..., description="Name of the tool; e.g. mcp.pubmed.get_paper_fulltext" + ) arguments: dict[str, Any] = Field(..., description="Arguments to the tool") success: bool = Field(..., description="Whether the tool call was successful") - error: str | None = Field(default=None, description="Error message if the tool call failed") + error: str | None = Field( + default=None, description="Error message if the tool call failed" + ) result: Any = Field(..., description="Result of the tool") @@ -87,6 +92,7 @@ class BaseCoder(BaseModel, ABC): Subclasses should implement the following methods: - run(self, input_text: str) -> CoderOutput: Run the coder on the input text """ + workdir: str = Field(default="workdir", description="Working dir ") config: CoderConfig | None = Field(default=None, description="Config for the coder") params: dict | None = Field(default=None, description="Parameters for the coder") @@ -115,8 +121,6 @@ def validate_mcp_support(self): ) return self - - @abstractmethod def run(self, input_text: str) -> CoderOutput: """Run the coder on the input text. @@ -129,7 +133,6 @@ def run(self, input_text: str) -> CoderOutput: """ raise NotImplementedError - @classmethod def default_config_paths(cls) -> dict[Path, ConfigFileRole]: """Return config files as a dictionary of filename/dirname to role.""" @@ -220,7 +223,6 @@ def stream_output(pipe, output_lines, stream): return CoderOutput(stdout=stdout_text, stderr=stderr_text) - def expand_env(self, env: dict[str, str] | None = None) -> dict[str, str]: """ Expand environment variables in the coder config. @@ -257,7 +259,7 @@ def expand_env(self, env: dict[str, str] | None = None) -> dict[str, str]: def expand_prompt(self, input_text: str) -> str: """Expand environment variables in the prompt. - + Typically this just returns the prompt as is: Example: @@ -278,7 +280,7 @@ def expand_prompt(self, input_text: str) -> str: def default_config_objects(self) -> list[CoderConfigObject]: """Default config objects for the coder.""" raise NotImplementedError("default_config_objects is not implemented") - + def set_instructions(self, instructions: str): """Set the instructions for the coder. @@ -291,7 +293,7 @@ def set_instructions(self, instructions: str): >>> coder.set_instructions("you are an awesome coder") >>> coder.config_objects [CoderConfigObject(file_type=, relative_path='CLAUDE.md', content='you are an awesome coder')] - + Args: instructions: The instructions to set """ @@ -300,16 +302,25 @@ def set_instructions(self, instructions: str): if not self.config_objects: self.config_objects = [] for obj in self.config_objects: - if obj.relative_path == str(path) or obj.relative_path == str(path.name): + if obj.relative_path == str(path) or obj.relative_path == str( + path.name + ): obj.content = instructions return else: - self.config_objects.append(CoderConfigObject(relative_path=str(path), content=instructions, file_type=FileType.TEXT)) + self.config_objects.append( + CoderConfigObject( + relative_path=str(path), + content=instructions, + file_type=FileType.TEXT, + ) + ) return else: raise ValueError(f"Cannot set instructions for {typ}") - raise ValueError(f"No primary instruction file found for {self.__class__.__name__}") - + raise ValueError( + f"No primary instruction file found for {self.__class__.__name__}" + ) def prepare_workdir(self): """Prepare the workdir for the coder. @@ -330,11 +341,7 @@ def prepare_workdir(self): # Check if MCP extensions are configured but not supported if self.config and self.config.extensions: logger.debug(f"šŸ”§ Checking MCP extensions: {self.config.extensions}") - mcp_extensions = [ - ext - for ext in self.config.extensions - if ext.enabled - ] + mcp_extensions = [ext for ext in self.config.extensions if ext.enabled] if mcp_extensions and not self.supports_mcp(): raise ValueError( f"MCP extensions are configured but {self.__class__.__name__} does not support MCP. " @@ -353,6 +360,7 @@ def prepare_workdir(self): logger.debug(f" šŸ—‘ļø Removing old config object: {path}") if path.is_dir(): import shutil + shutil.rmtree(path) else: path.unlink() diff --git a/src/metacoder/coders/claude.py b/src/metacoder/coders/claude.py index 1c67c20..cf1af7c 100644 --- a/src/metacoder/coders/claude.py +++ b/src/metacoder/coders/claude.py @@ -147,6 +147,7 @@ def run(self, input_text: str) -> CoderOutput: # time the command start_time = time.time() ao = self.run_process(command, env) + # parse the jsonl output def parse_jsonl_line(text: str) -> dict[str, Any]: try: @@ -154,17 +155,20 @@ def parse_jsonl_line(text: str) -> dict[str, Any]: return result except json.JSONDecodeError: return {"original": text, "error": "JSONDecodeError"} + ao.structured_messages = [ parse_jsonl_line(line) for line in ao.stdout.split("\n") if line ] - ao.structured_messages = [m for m in ao.structured_messages if m is not None] + ao.structured_messages = [ + m for m in ao.structured_messages if m is not None + ] total_cost_usd = None is_error = None - + # Extract tool uses tool_uses = [] pending_tool_uses = {} # Map tool_use_id to tool data - + for message in ao.structured_messages: if "total_cost_usd" in message: total_cost_usd = message["total_cost_usd"] @@ -172,7 +176,7 @@ def parse_jsonl_line(text: str) -> dict[str, Any]: is_error = message["is_error"] if "result" in message: ao.result_text = message["result"] - + # Check for tool_use in assistant messages if message.get("type") == "assistant" and message.get("message"): msg_content = message["message"].get("content", []) @@ -182,16 +186,16 @@ def parse_jsonl_line(text: str) -> dict[str, Any]: tool_id = content_item.get("id") tool_name = content_item.get("name", "") tool_input = content_item.get("input", {}) - + # Store pending tool use pending_tool_uses[tool_id] = { "name": tool_name, "arguments": tool_input, "success": False, # Default to False until we see result "error": None, - "result": None + "result": None, } - + # Check for tool_result in user messages elif message.get("type") == "user" and message.get("message"): msg_content = message["message"].get("content", []) @@ -201,31 +205,35 @@ def parse_jsonl_line(text: str) -> dict[str, Any]: tool_id = content_item.get("tool_use_id") if tool_id in pending_tool_uses: tool_data = pending_tool_uses[tool_id] - + # Update with result is_tool_error = content_item.get("is_error", False) tool_data["success"] = not is_tool_error - tool_data["result"] = content_item.get("content", "") - + tool_data["result"] = content_item.get( + "content", "" + ) + if is_tool_error: - tool_data["error"] = content_item.get("content", "Tool error occurred") - + tool_data["error"] = content_item.get( + "content", "Tool error occurred" + ) + # Create ToolUse object tool_use = ToolUse(**tool_data) tool_uses.append(tool_use) - + # Remove from pending del pending_tool_uses[tool_id] - + # Add any remaining pending tool uses (shouldn't happen in normal flow) for tool_data in pending_tool_uses.values(): tool_data["error"] = "No result received for tool call" tool_use = ToolUse(**tool_data) tool_uses.append(tool_use) - + if tool_uses: ao.tool_uses = tool_uses - + end_time = time.time() logger.info(f"šŸ¤– Command took {end_time - start_time} seconds") ao.total_cost_usd = total_cost_usd diff --git a/src/metacoder/coders/codex.py b/src/metacoder/coders/codex.py index 2f29483..8e9169e 100644 --- a/src/metacoder/coders/codex.py +++ b/src/metacoder/coders/codex.py @@ -26,7 +26,6 @@ def is_available(cls) -> bool: """Check if codex command is available.""" return shutil.which("codex") is not None - @property def instructions_path(self) -> Path: return Path("AGENTS.md") diff --git a/src/metacoder/coders/dummy.py b/src/metacoder/coders/dummy.py index bb93159..d55378d 100644 --- a/src/metacoder/coders/dummy.py +++ b/src/metacoder/coders/dummy.py @@ -1,17 +1,22 @@ from pathlib import Path -from metacoder.coders.base_coder import BaseCoder, CoderConfigObject, CoderOutput, ToolUse +from metacoder.coders.base_coder import ( + BaseCoder, + CoderConfigObject, + CoderOutput, + ToolUse, +) from metacoder.configuration import ConfigFileRole class DummyCoder(BaseCoder): """ Dummy coder for testing. - + Simulates tool use when input contains keywords: - "tool" or "mcp": Adds a generic test tool - "search" or "pubmed": Simulates a PubMed search tool - "error": Simulates a tool failure - + Multiple keywords can trigger multiple tools. """ @@ -34,58 +39,71 @@ def run(self, input_text: str) -> CoderOutput: instructions_content = None if self.config_objects: for obj in self.config_objects: - if obj.relative_path == "INSTRUCTIONS.md" or obj.relative_path == str(Path("INSTRUCTIONS.md")): + if obj.relative_path == "INSTRUCTIONS.md" or obj.relative_path == str( + Path("INSTRUCTIONS.md") + ): instructions_content = obj.content break - + # Create response based on whether instructions exist if instructions_content: - response = f"Instructions loaded: {instructions_content}\nProcessing: {input_text}" + response = ( + f"Instructions loaded: {instructions_content}\nProcessing: {input_text}" + ) else: response = f"you said: {input_text}" - + output = CoderOutput( stdout=response, stderr="", result_text=response, ) - + # Add fake tool uses if input mentions tools, MCP, or specific services - if any(keyword in input_text.lower() for keyword in ["tool", "mcp", "pubmed", "search"]): + if any( + keyword in input_text.lower() + for keyword in ["tool", "mcp", "pubmed", "search"] + ): # Create some fake tool uses for testing tool_uses = [] - + # Simulate a successful tool call if "search" in input_text.lower() or "pubmed" in input_text.lower(): - tool_uses.append(ToolUse( - name="mcp__pubmed__search_papers", - arguments={"query": "test query", "limit": 10}, - success=True, - error=None, - result={"papers": ["paper1", "paper2"], "count": 2} - )) - + tool_uses.append( + ToolUse( + name="mcp__pubmed__search_papers", + arguments={"query": "test query", "limit": 10}, + success=True, + error=None, + result={"papers": ["paper1", "paper2"], "count": 2}, + ) + ) + # Simulate a tool with an error if "error" in input_text.lower(): - tool_uses.append(ToolUse( - name="mcp__test__failing_tool", - arguments={"param": "value"}, - success=False, - error="Simulated tool error for testing", - result=None - )) - + tool_uses.append( + ToolUse( + name="mcp__test__failing_tool", + arguments={"param": "value"}, + success=False, + error="Simulated tool error for testing", + result=None, + ) + ) + # Default tool if no specific keywords but general tool/mcp mentioned if not tool_uses: - tool_uses.append(ToolUse( - name="mcp__dummy__test_tool", - arguments={"input": input_text}, - success=True, - error=None, - result="Test tool executed successfully" - )) - + tool_uses.append( + ToolUse( + name="mcp__dummy__test_tool", + arguments={"input": input_text}, + success=True, + error=None, + result="Test tool executed successfully", + ) + ) + if tool_uses: output.tool_uses = tool_uses - + return output diff --git a/src/metacoder/coders/gemini.py b/src/metacoder/coders/gemini.py index f81a5c8..20564a9 100644 --- a/src/metacoder/coders/gemini.py +++ b/src/metacoder/coders/gemini.py @@ -32,7 +32,7 @@ class GeminiCoder(BaseCoder): - `.gemini/commands/` - Custom commands directory MCP Support: - + Gemini CLI supports MCP (Model Context Protocol) servers through the mcpServers configuration in .gemini/settings.json. When MCPs are configured through Metacoder, they will be automatically added to the settings file. @@ -86,29 +86,27 @@ def mcp_config_to_gemini_format(self, mcp: MCPConfig) -> dict[str, Any]: # For HTTP type MCPs elif mcp.type == MCPType.HTTP: - raise NotImplementedError( - "HTTP MCPs are not supported for Gemini CLI yet" - ) + raise NotImplementedError("HTTP MCPs are not supported for Gemini CLI yet") return server_config def default_config_objects(self) -> list[CoderConfigObject]: """Generate config objects including MCP configuration.""" config_objects = [] - + # Create .gemini/settings.json if we have MCP extensions settings_content: dict[str, Any] = {} - + # Add MCP servers configuration if extensions are present if self.config and self.config.extensions: mcp_servers = {} for mcp in self.config.extensions: if mcp.enabled: mcp_servers[mcp.name] = self.mcp_config_to_gemini_format(mcp) - + if mcp_servers: settings_content["mcpServers"] = mcp_servers - + # Add settings.json if we have content to write if settings_content: config_objects.append( @@ -118,10 +116,10 @@ def default_config_objects(self) -> list[CoderConfigObject]: content=settings_content, ) ) - + # Add GEMINI.md if present in config # This could contain instructions specific to the task - + return config_objects def run(self, input_text: str) -> CoderOutput: @@ -136,7 +134,7 @@ def run(self, input_text: str) -> CoderOutput: env["HOME"] = "." text = self.expand_prompt(input_text) - + # Build the command # The gemini CLI uses conversational interface, so we need to handle it differently # For now, we'll use echo to pipe the prompt diff --git a/src/metacoder/coders/goose.py b/src/metacoder/coders/goose.py index 9b76f4b..514dc2b 100644 --- a/src/metacoder/coders/goose.py +++ b/src/metacoder/coders/goose.py @@ -40,7 +40,6 @@ def supports_mcp(cls) -> bool: """GooseCoder supports MCP extensions.""" return True - def mcp_config_to_goose_extension(self, mcp: MCPConfig) -> dict: """Convert an MCPConfig to Goose extension format.""" extension = { @@ -69,7 +68,7 @@ def mcp_config_to_goose_extension(self, mcp: MCPConfig) -> dict: extension["bundled"] = None return extension - + @classmethod def default_config_paths(cls) -> dict[Path, ConfigFileRole]: return { @@ -196,38 +195,44 @@ def run(self, input_text: str) -> CoderOutput: if ao.structured_messages: tool_uses = [] pending_tool_uses = {} # Map tool request id to tool data - + for message in ao.structured_messages: # Check for tool requests in assistant messages if message.get("role") == "assistant" and "content" in message: for content in message.get("content", []): - if isinstance(content, dict) and content.get("type") == "toolRequest": + if ( + isinstance(content, dict) + and content.get("type") == "toolRequest" + ): tool_id = content.get("id") tool_call = content.get("toolCall", {}) - + if tool_call.get("status") == "success": tool_value = tool_call.get("value", {}) tool_name = tool_value.get("name", "") tool_args = tool_value.get("arguments", {}) - + # Store pending tool use pending_tool_uses[tool_id] = { "name": tool_name, "arguments": tool_args, "success": False, # Default until we see result "error": None, - "result": None + "result": None, } - + # Check for tool responses in user messages elif message.get("role") == "user" and "content" in message: for content in message.get("content", []): - if isinstance(content, dict) and content.get("type") == "toolResponse": + if ( + isinstance(content, dict) + and content.get("type") == "toolResponse" + ): tool_id = content.get("id") if tool_id in pending_tool_uses: tool_data = pending_tool_uses[tool_id] tool_result = content.get("toolResult", {}) - + # Update with result if tool_result.get("status") == "success": tool_data["success"] = True @@ -236,29 +241,40 @@ def run(self, input_text: str) -> CoderOutput: if isinstance(result_value, list): result_texts = [] for item in result_value: - if isinstance(item, dict) and item.get("type") == "text": - result_texts.append(item.get("text", "")) - tool_data["result"] = "\n".join(result_texts) if result_texts else str(result_value) + if ( + isinstance(item, dict) + and item.get("type") == "text" + ): + result_texts.append( + item.get("text", "") + ) + tool_data["result"] = ( + "\n".join(result_texts) + if result_texts + else str(result_value) + ) else: tool_data["result"] = str(result_value) else: tool_data["success"] = False - tool_data["error"] = tool_result.get("error", "Tool execution failed") + tool_data["error"] = tool_result.get( + "error", "Tool execution failed" + ) tool_data["result"] = None - + # Create ToolUse object tool_use = ToolUse(**tool_data) tool_uses.append(tool_use) - + # Remove from pending del pending_tool_uses[tool_id] - + # Add any remaining pending tool uses (shouldn't happen in normal flow) for tool_data in pending_tool_uses.values(): tool_data["error"] = "No result received for tool call" tool_use = ToolUse(**tool_data) tool_uses.append(tool_use) - + if tool_uses: ao.tool_uses = tool_uses diff --git a/src/metacoder/configuration.py b/src/metacoder/configuration.py index 03cefca..5110fe8 100644 --- a/src/metacoder/configuration.py +++ b/src/metacoder/configuration.py @@ -54,7 +54,10 @@ class AIModelProvider(BaseModel): name: str = Field(..., description="Name of the model provider") api_key: str | None = Field(None, description="API key for the model provider") metadata: dict[str, Any] = Field({}, description="Metadata for the model provider") - base_url: str | None = Field(None, description="Base URL for the model provider") + base_url: str | None = Field( + None, + description="Base URL for the model provider, e.g. https://api.cborg.lbl.gov ", + ) class AIModelConfig(BaseModel): diff --git a/src/metacoder/evals/runner.py b/src/metacoder/evals/runner.py index a12658a..67a9619 100644 --- a/src/metacoder/evals/runner.py +++ b/src/metacoder/evals/runner.py @@ -59,30 +59,27 @@ def is_successful(self) -> bool: return self.success -correctness_metric = GEval( - name="Correctness", - criteria="Determine whether the actual output is factually correct based on the expected output.", - # NOTE: you can only provide either criteria or evaluation_steps, and not both - evaluation_steps=[ - "Check whether the facts in 'actual output' contradicts any facts in 'expected output'", - "You should also heavily penalize omission of detail", - "Vague language, or contradicting OPINIONS, are OK", - ], - threshold=0.8, - evaluation_params=[ - LLMTestCaseParams.INPUT, - LLMTestCaseParams.ACTUAL_OUTPUT, - LLMTestCaseParams.EXPECTED_OUTPUT, - ], -) - -# instances -dummy_metric = DummyMetric(threshold=0.5) - -METRICS = { - "CorrectnessMetric": correctness_metric, - "DummyMetric": dummy_metric, -} +def get_default_metrics() -> Dict[str, BaseMetric]: + """Get default metrics. Creates instances lazily to avoid network calls during import.""" + return { + "CorrectnessMetric": GEval( + name="Correctness", + criteria="Determine whether the actual output is factually correct based on the expected output.", + # NOTE: you can only provide either criteria or evaluation_steps, and not both + evaluation_steps=[ + "Check whether the facts in 'actual output' contradicts any facts in 'expected output'", + "You should also heavily penalize omission of detail", + "Vague language, or contradicting OPINIONS, are OK", + ], + threshold=0.8, + evaluation_params=[ + LLMTestCaseParams.INPUT, + LLMTestCaseParams.ACTUAL_OUTPUT, + LLMTestCaseParams.EXPECTED_OUTPUT, + ], + ), + "DummyMetric": DummyMetric(threshold=0.5), + } def create_coder(coder_name: str, workdir: str, config=None) -> BaseCoder: @@ -103,7 +100,6 @@ def create_coder(coder_name: str, workdir: str, config=None) -> BaseCoder: return coder - class EvalResult(BaseModel): """Result of a single evaluation.""" @@ -227,8 +223,9 @@ def run_single_eval( # Run each metric for metric_name in case.metrics: - if metric_name in METRICS: - metric = METRICS[metric_name] + default_metrics = get_default_metrics() + if metric_name in default_metrics: + metric = default_metrics[metric_name] else: # Get metric class and instantiate metric_class = self.get_metric_class(metric_name) diff --git a/src/metacoder/metacoder.py b/src/metacoder/metacoder.py index a74035a..f62d3df 100644 --- a/src/metacoder/metacoder.py +++ b/src/metacoder/metacoder.py @@ -51,16 +51,16 @@ def load_mcp_collection(collection_path: Path) -> MCPCollectionConfig: def load_mcp_registry(registry_path: str) -> MCPCollectionConfig: """Load MCPs from the registry based on a path pattern. - + Args: registry_path: Path pattern like 'metacoder' (all) or 'metacoder.basics' - + Returns: MCPCollectionConfig containing all matched MCPs """ # Base directory for registry registry_base = Path(__file__).parent / "mcps" / "registry" - + # Convert dot notation to file path if registry_path == "metacoder": # Load all yaml files in registry @@ -68,21 +68,21 @@ def load_mcp_registry(registry_path: str) -> MCPCollectionConfig: else: # Convert metacoder.basics to basics.yaml if registry_path.startswith("metacoder."): - registry_path = registry_path[len("metacoder."):] + registry_path = registry_path[len("metacoder.") :] yaml_files = [registry_base / f"{registry_path}.yaml"] - + # Collect all MCPs all_mcps = [] for yaml_file in yaml_files: if not yaml_file.exists(): raise click.ClickException(f"Registry file not found: {yaml_file}") - + try: with open(yaml_file, "r") as f: data = yaml.safe_load(f) except yaml.YAMLError as e: raise click.ClickException(f"Invalid YAML in {yaml_file}: {e}") - + # The registry files contain a list of MCP extensions directly if isinstance(data, list): for mcp_data in data: @@ -99,7 +99,7 @@ def load_mcp_registry(registry_path: str) -> MCPCollectionConfig: logger.warning(f"Invalid MCP in {yaml_file}: {e}") for mcp in all_mcps: mcp.enabled = False - + # Create a collection config collection_name = f"Registry: {registry_path}" return MCPCollectionConfig(name=collection_name, description=None, servers=all_mcps) @@ -239,7 +239,9 @@ def cli(ctx): "--provider", "-p", type=str, help="AI provider (e.g., openai, anthropic, google)" ) @click.option( - "--model", type=str, help="AI model name (e.g., gpt-4, claude-3-opus, gemini-pro)" + "--model", + type=str, + help="AI model name (e.g., gpt-4o, claude-4-sonnet, gemini-2.5pro)", ) @click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging") @click.option("--quiet", "-q", is_flag=True, help="Quiet mode") @@ -323,7 +325,7 @@ def run( raise click.ClickException("Cannot use both verbose and quiet mode") if verbose: logging.basicConfig(level=logging.DEBUG) - elif quiet: # quiet mode is a bit different, it's just no output + elif quiet: # quiet mode is a bit different, it's just no output logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) @@ -358,26 +360,29 @@ def run( click.echo( f" Enabling MCPs: {', '.join(enabled_list)} (all enabled by default)" ) - + # Load MCPs from registry if provided if registry: click.echo(f"šŸ“š Loading MCPs from registry: {registry}") registry_config = load_mcp_registry(registry) - + # Merge with existing MCP collection if any if mcp_collection_config: # Merge the servers lists for mcp in registry_config.servers: # Avoid duplicates by name - if not any(existing.name == mcp.name for existing in mcp_collection_config.servers): + if not any( + existing.name == mcp.name + for existing in mcp_collection_config.servers + ): mcp_collection_config.servers.append(mcp) else: mcp_collection_config = registry_config - + # Show available MCPs from registry registry_mcps = [mcp.name for mcp in registry_config.servers] click.echo(f" Registry MCPs: {', '.join(registry_mcps)}") - + # Note that registry MCPs are not enabled by default if not enable_mcp: click.echo(" Use -e/--enable-mcp to enable specific MCPs") @@ -421,7 +426,7 @@ def run( ) if coder_config and coder_config.extensions: - for mcp in coder_config.extensions : + for mcp in coder_config.extensions: # use emoji to indicate enabled/disabled if mcp.enabled: click.echo(f" āœ… MCP: {mcp.name}") @@ -476,7 +481,9 @@ def run( click.echo("\nšŸ“‹ Tool uses:") for tool_use in result.tool_uses: success = "āœ…" if tool_use.success else "āŒ" - click.echo(f" {success} {tool_use.name} with arguments: {tool_use.arguments}") + click.echo( + f" {success} {tool_use.name} with arguments: {tool_use.arguments}" + ) if tool_use.error: click.echo(f" Error: {tool_use.error}") @@ -485,7 +492,7 @@ def run( f"\nšŸ“‹ Structured messages ({len(result.structured_messages)} total)" ) for i, msg in enumerate(result.structured_messages): - click.echo(f" {i+1}. {msg}") + click.echo(f" {i + 1}. {msg}") @cli.command("list-coders") @@ -588,10 +595,10 @@ def eval_command(config: str, output: str, workdir: str, coders: tuple, verbose: click.echo("\nšŸ“ˆ Summary:") click.echo(f" Total: {summary['total_evaluations']}") click.echo( - f" Passed: {summary['passed']} ({summary['passed']/summary['total_evaluations']*100:.1f}%)" + f" Passed: {summary['passed']} ({summary['passed'] / summary['total_evaluations'] * 100:.1f}%)" ) click.echo( - f" Failed: {summary['failed']} ({summary['failed']/summary['total_evaluations']*100:.1f}%)" + f" Failed: {summary['failed']} ({summary['failed'] / summary['total_evaluations'] * 100:.1f}%)" ) if summary["errors"] > 0: click.echo(f" Errors: {summary['errors']} āš ļø") @@ -640,22 +647,22 @@ def eval_command(config: str, output: str, workdir: str, coders: tuple, verbose: def introspect_mcp(mcp_spec: str, registry: Optional[str], timeout: int, verbose: bool): """ Introspect an MCP server to list its available tools, resources, and prompts. - + MCP_SPEC can be: - A URL (http://localhost:8080) - A command (uvx mcp-server-fetch) - An MCP name when used with --registry - + Examples: - + \b # Introspect a running MCP server metacoder introspect-mcp http://localhost:8080 - + \b # Introspect an MCP from registry metacoder introspect-mcp fetch --registry metacoder.basics - + \b # Introspect a command-based MCP metacoder introspect-mcp "uvx mcp-server-fetch" @@ -665,18 +672,24 @@ def introspect_mcp(mcp_spec: str, registry: Optional[str], timeout: int, verbose logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) - + # Run the introspection with proper cleanup import os import sys - + # Suppress the specific asyncio warning by running with -W flag env = os.environ.copy() - env['PYTHONWARNINGS'] = 'ignore::RuntimeWarning:asyncio.base_subprocess' - + env["PYTHONWARNINGS"] = "ignore::RuntimeWarning:asyncio.base_subprocess" + # Run in a subprocess to isolate the asyncio event loop import subprocess - args = [sys.executable, "-W", "ignore::RuntimeWarning:asyncio.base_subprocess", "-c", f""" + + args = [ + sys.executable, + "-W", + "ignore::RuntimeWarning:asyncio.base_subprocess", + "-c", + f""" import asyncio import sys sys.path.insert(0, {repr(str(Path(__file__).parent.parent))}) @@ -688,26 +701,26 @@ def introspect_mcp(mcp_spec: str, registry: Optional[str], timeout: int, verbose except Exception as e: print(f"Error: {{e}}", file=sys.stderr) sys.exit(1) -"""] - +""", + ] + try: # Run with stderr captured to filter out asyncio warnings result = subprocess.run( - args, - env=env, - timeout=timeout + 5, - stderr=subprocess.PIPE, - text=True + args, env=env, timeout=timeout + 5, stderr=subprocess.PIPE, text=True ) - + # Filter out the specific asyncio warning from stderr if result.stderr: error_lines = [] skip_next = 0 lines = result.stderr.splitlines() - + for i, line in enumerate(lines): - if "Exception ignored in: 0: @@ -717,12 +730,12 @@ def introspect_mcp(mcp_spec: str, registry: Optional[str], timeout: int, verbose skip_next = 0 # Stop skipping after this line else: error_lines.append(line) - + # Print any remaining stderr if error_lines: for line in error_lines: click.echo(line, err=True) - + if result.returncode != 0: raise click.ClickException("Failed to introspect MCP server") except subprocess.TimeoutExpired: @@ -736,48 +749,50 @@ async def _introspect_mcp_async( ): """Async implementation of MCP introspection.""" from fastmcp import Client - + mcp_config = None spec_to_use: Union[str, list[str]] = mcp_spec - + # If registry is specified, load the MCP config if registry: click.echo(f"šŸ“š Loading MCP '{mcp_spec}' from registry: {registry}") registry_config = load_mcp_registry(registry) - + # Find the MCP in the registry mcp_config = None for mcp in registry_config.servers: if mcp.name == mcp_spec: mcp_config = mcp break - + if not mcp_config: available = [mcp.name for mcp in registry_config.servers] raise click.ClickException( f"MCP '{mcp_spec}' not found in registry. Available: {', '.join(available)}" ) - + # Build the command from MCP config if mcp_config.command and mcp_config.args: spec_to_use = [mcp_config.command] + mcp_config.args else: - raise click.ClickException(f"MCP '{mcp_spec}' has incomplete command configuration") - + raise click.ClickException( + f"MCP '{mcp_spec}' has incomplete command configuration" + ) + click.echo(f"šŸ” Introspecting MCP: {spec_to_use}") - + # Create client based on the spec type if isinstance(spec_to_use, list): # Command-based MCP - FastMCP expects a single server config dict server_config = { "server_name": { "command": spec_to_use[0], - "args": spec_to_use[1:] if len(spec_to_use) > 1 else [] + "args": spec_to_use[1:] if len(spec_to_use) > 1 else [], } } if mcp_config and mcp_config.env: server_config["server_name"]["env"] = mcp_config.env # type: ignore - + # FastMCP expects the full config with mcpServers key full_config = {"mcpServers": server_config} client = Client(full_config) @@ -787,28 +802,29 @@ async def _introspect_mcp_async( else: # Try as command import shlex + parts = shlex.split(spec_to_use) server_config = { "server_name": { "command": parts[0], - "args": parts[1:] if len(parts) > 1 else [] + "args": parts[1:] if len(parts) > 1 else [], } } full_config = {"mcpServers": server_config} client = Client(full_config) - + async with client: click.echo("āœ… Connected to MCP server") - + # Get server info if available - if hasattr(client, 'server_info'): + if hasattr(client, "server_info"): info = client.server_info click.echo("\nšŸ“‹ Server Info:") click.echo(f" Name: {info.name}") click.echo(f" Version: {info.version}") - if hasattr(info, 'description') and info.description: + if hasattr(info, "description") and info.description: click.echo(f" Description: {info.description}") - + # List tools click.echo("\nšŸ”§ Available Tools:") try: @@ -818,13 +834,15 @@ async def _introspect_mcp_async( click.echo(f"\n šŸ“Œ {tool.name}") if tool.description: click.echo(f" Description: {tool.description}") - if verbose and hasattr(tool, 'inputSchema') and tool.inputSchema: - click.echo(f" Input Schema: {yaml.dump(tool.inputSchema, default_flow_style=False, indent=8).strip()}") + if verbose and hasattr(tool, "inputSchema") and tool.inputSchema: + click.echo( + f" Input Schema: {yaml.dump(tool.inputSchema, default_flow_style=False, indent=8).strip()}" + ) else: click.echo(" (No tools available)") except Exception as e: click.echo(f" āš ļø Error listing tools: {e}") - + # List resources click.echo("\nšŸ“ Available Resources:") try: @@ -841,7 +859,7 @@ async def _introspect_mcp_async( click.echo(" (No resources available)") except Exception as e: click.echo(f" āš ļø Error listing resources: {e}") - + # List prompts click.echo("\nšŸ’¬ Available Prompts:") try: @@ -851,16 +869,18 @@ async def _introspect_mcp_async( click.echo(f"\n šŸ’” {prompt.name}") if prompt.description: click.echo(f" Description: {prompt.description}") - if verbose and hasattr(prompt, 'arguments') and prompt.arguments: + if verbose and hasattr(prompt, "arguments") and prompt.arguments: click.echo(" Arguments:") for arg in prompt.arguments: req = "required" if arg.required else "optional" - click.echo(f" - {arg.name} ({req}): {arg.description}") + click.echo( + f" - {arg.name} ({req}): {arg.description}" + ) else: click.echo(" (No prompts available)") except Exception as e: click.echo(f" āš ļø Error listing prompts: {e}") - + click.echo("\nāœ… Introspection complete!") diff --git a/tests/test_claude_tool_use.py b/tests/test_claude_tool_use.py index 817617f..3d74485 100644 --- a/tests/test_claude_tool_use.py +++ b/tests/test_claude_tool_use.py @@ -1,10 +1,11 @@ """Test ClaudeCoder tool use extraction.""" + from metacoder.coders.base_coder import CoderOutput, ToolUse def test_claude_tool_use_extraction(): """Test that ClaudeCoder correctly extracts tool uses from structured messages.""" - + # Create a mock output with tool use in structured messages output = CoderOutput( stdout="", @@ -18,31 +19,31 @@ def test_claude_tool_use_extraction(): "type": "tool_use", "id": "toolu_019mJqdgpJSP1Z6UcfsMhx7s", "name": "mcp__pubmed__get_paper_fulltext", - "input": {"pmid": "35743164"} + "input": {"pmid": "35743164"}, } ] - } + }, }, { - "type": "user", + "type": "user", "message": { "content": [ { "type": "tool_result", "content": "Paper content here...", "is_error": False, - "tool_use_id": "toolu_019mJqdgpJSP1Z6UcfsMhx7s" + "tool_use_id": "toolu_019mJqdgpJSP1Z6UcfsMhx7s", } ] - } - } - ] + }, + }, + ], ) - + # Process structured messages to extract tool uses tool_uses = [] pending_tool_uses = {} - + for message in output.structured_messages: # Check for tool_use in assistant messages if message.get("type") == "assistant" and message.get("message"): @@ -53,16 +54,16 @@ def test_claude_tool_use_extraction(): tool_id = content_item.get("id") tool_name = content_item.get("name", "") tool_input = content_item.get("input", {}) - + # Store pending tool use pending_tool_uses[tool_id] = { "name": tool_name, "arguments": tool_input, "success": False, "error": None, - "result": None + "result": None, } - + # Check for tool_result in user messages elif message.get("type") == "user" and message.get("message"): msg_content = message["message"].get("content", []) @@ -72,22 +73,24 @@ def test_claude_tool_use_extraction(): tool_id = content_item.get("tool_use_id") if tool_id in pending_tool_uses: tool_data = pending_tool_uses[tool_id] - + # Update with result is_tool_error = content_item.get("is_error", False) tool_data["success"] = not is_tool_error tool_data["result"] = content_item.get("content", "") - + if is_tool_error: - tool_data["error"] = content_item.get("content", "Tool error occurred") - + tool_data["error"] = content_item.get( + "content", "Tool error occurred" + ) + # Create ToolUse object tool_use = ToolUse(**tool_data) tool_uses.append(tool_use) - + # Remove from pending del pending_tool_uses[tool_id] - + # Verify extraction assert len(tool_uses) == 1 tool_use = tool_uses[0] @@ -100,7 +103,7 @@ def test_claude_tool_use_extraction(): def test_claude_tool_use_error(): """Test that ClaudeCoder correctly handles tool errors.""" - + # Create a mock output with tool error output = CoderOutput( stdout="", @@ -114,31 +117,31 @@ def test_claude_tool_use_error(): "type": "tool_use", "id": "toolu_test", "name": "mcp__pubmed__get_paper_fulltext", - "input": {"pmid": "invalid"} + "input": {"pmid": "invalid"}, } ] - } + }, }, { - "type": "user", + "type": "user", "message": { "content": [ { "type": "tool_result", "content": "MCP tool response exceeds maximum allowed tokens", "is_error": True, - "tool_use_id": "toolu_test" + "tool_use_id": "toolu_test", } ] - } - } - ] + }, + }, + ], ) - + # Process structured messages to extract tool uses tool_uses = [] pending_tool_uses = {} - + for message in output.structured_messages: # Check for tool_use in assistant messages if message.get("type") == "assistant" and message.get("message"): @@ -149,16 +152,16 @@ def test_claude_tool_use_error(): tool_id = content_item.get("id") tool_name = content_item.get("name", "") tool_input = content_item.get("input", {}) - + # Store pending tool use pending_tool_uses[tool_id] = { "name": tool_name, "arguments": tool_input, "success": False, "error": None, - "result": None + "result": None, } - + # Check for tool_result in user messages elif message.get("type") == "user" and message.get("message"): msg_content = message["message"].get("content", []) @@ -168,22 +171,24 @@ def test_claude_tool_use_error(): tool_id = content_item.get("tool_use_id") if tool_id in pending_tool_uses: tool_data = pending_tool_uses[tool_id] - + # Update with result is_tool_error = content_item.get("is_error", False) tool_data["success"] = not is_tool_error tool_data["result"] = content_item.get("content", "") - + if is_tool_error: - tool_data["error"] = content_item.get("content", "Tool error occurred") - + tool_data["error"] = content_item.get( + "content", "Tool error occurred" + ) + # Create ToolUse object tool_use = ToolUse(**tool_data) tool_uses.append(tool_use) - + # Remove from pending del pending_tool_uses[tool_id] - + # Verify error handling assert len(tool_uses) == 1 tool_use = tool_uses[0] @@ -191,4 +196,4 @@ def test_claude_tool_use_error(): assert tool_use.arguments == {"pmid": "invalid"} assert tool_use.success is False assert tool_use.error == "MCP tool response exceeds maximum allowed tokens" - assert tool_use.result == "MCP tool response exceeds maximum allowed tokens" \ No newline at end of file + assert tool_use.result == "MCP tool response exceeds maximum allowed tokens" diff --git a/tests/test_coders/test_coder_availability.py b/tests/test_coders/test_coder_availability.py index d9a75d3..4b53b63 100644 --- a/tests/test_coders/test_coder_availability.py +++ b/tests/test_coders/test_coder_availability.py @@ -59,12 +59,12 @@ def test_all_coders_have_availability_method(): from metacoder.metacoder import AVAILABLE_CODERS for coder_name, coder_class in AVAILABLE_CODERS.items(): - assert hasattr( - coder_class, "is_available" - ), f"{coder_name} missing is_available method" - assert callable( - coder_class.is_available - ), f"{coder_name}.is_available is not callable" + assert hasattr(coder_class, "is_available"), ( + f"{coder_name} missing is_available method" + ) + assert callable(coder_class.is_available), ( + f"{coder_name}.is_available is not callable" + ) @patch("shutil.which") diff --git a/tests/test_coders/test_coders_basic.py b/tests/test_coders/test_coders_basic.py index 4180e92..cb73641 100644 --- a/tests/test_coders/test_coders_basic.py +++ b/tests/test_coders/test_coders_basic.py @@ -62,17 +62,17 @@ def test_llm_coder_basic_arithmetic(coder_name, coder_class): # Check result assert result is not None - assert ( - result.stdout or result.result_text - ), "Coder should produce some output" + assert result.stdout or result.result_text, ( + "Coder should produce some output" + ) # Get the actual output text output_text = result.result_text or result.stdout # All LLM coders should include "4" in their answer - assert ( - "4" in output_text - ), f"{coder_name} should answer '4' to 'What is 2+2?'" + assert "4" in output_text, ( + f"{coder_name} should answer '4' to 'What is 2+2?'" + ) except Exception as e: pytest.fail(f"Coder {coder_name} failed with error: {e}") @@ -137,12 +137,12 @@ def test_llm_coder_code_generation(coder_name, coder_class): assert output_text, "Coder should produce some output" # Verify the output contains Python code elements - assert ( - "def" in output_text - ), f"{coder_name} should generate a Python function" - assert ( - "return" in output_text or "print" in output_text - ), f"{coder_name} should have return or print" + assert "def" in output_text, ( + f"{coder_name} should generate a Python function" + ) + assert "return" in output_text or "print" in output_text, ( + f"{coder_name} should have return or print" + ) except Exception as e: pytest.fail(f"Coder {coder_name} failed with error: {e}") diff --git a/tests/test_dummy_coder_tool_capture.py b/tests/test_dummy_coder_tool_capture.py index a3d6364..b92b7ab 100644 --- a/tests/test_dummy_coder_tool_capture.py +++ b/tests/test_dummy_coder_tool_capture.py @@ -1,4 +1,5 @@ """Test that DummyCoder properly captures tool calls in CoderOutput.""" + from metacoder.coders.dummy import DummyCoder from metacoder.coders.base_coder import CoderOutput, ToolUse @@ -6,23 +7,25 @@ def test_dummy_coder_captures_tool_calls(): """Test that DummyCoder captures tool calls in the CoderOutput.""" coder = DummyCoder(workdir="test") - + # Run with input that triggers tool use output = coder.run("Use MCP to search PubMed for cancer research") - + # Verify output is a CoderOutput instance assert isinstance(output, CoderOutput) - + # Verify basic output fields assert output.stdout == "you said: Use MCP to search PubMed for cancer research" assert output.stderr == "" - assert output.result_text == "you said: Use MCP to search PubMed for cancer research" - + assert ( + output.result_text == "you said: Use MCP to search PubMed for cancer research" + ) + # Verify tool_uses is populated assert output.tool_uses is not None assert isinstance(output.tool_uses, list) assert len(output.tool_uses) == 1 - + # Verify the tool use is properly structured tool_use = output.tool_uses[0] assert isinstance(tool_use, ToolUse) @@ -36,27 +39,27 @@ def test_dummy_coder_captures_tool_calls(): def test_dummy_coder_captures_multiple_tools(): """Test that DummyCoder can capture multiple tool calls.""" coder = DummyCoder(workdir="test") - + # Run with input that triggers multiple tools output = coder.run("Search PubMed and then cause an error") - + # Verify multiple tools are captured assert output.tool_uses is not None assert len(output.tool_uses) == 2 - + # Verify each tool is a proper ToolUse instance for tool in output.tool_uses: assert isinstance(tool, ToolUse) - assert hasattr(tool, 'name') - assert hasattr(tool, 'arguments') - assert hasattr(tool, 'success') - assert hasattr(tool, 'error') - assert hasattr(tool, 'result') - + assert hasattr(tool, "name") + assert hasattr(tool, "arguments") + assert hasattr(tool, "success") + assert hasattr(tool, "error") + assert hasattr(tool, "result") + # Check first tool (PubMed search) assert output.tool_uses[0].name == "mcp__pubmed__search_papers" assert output.tool_uses[0].success is True - + # Check second tool (error) assert output.tool_uses[1].name == "mcp__test__failing_tool" assert output.tool_uses[1].success is False @@ -66,14 +69,14 @@ def test_dummy_coder_captures_multiple_tools(): def test_dummy_coder_no_tools_when_not_triggered(): """Test that DummyCoder doesn't add tools when not triggered.""" coder = DummyCoder(workdir="test") - + # Run with input that doesn't trigger tools output = coder.run("What is the weather today?") - + # Verify output structure assert isinstance(output, CoderOutput) assert output.stdout == "you said: What is the weather today?" - + # Verify no tools are added assert output.tool_uses is None @@ -81,14 +84,14 @@ def test_dummy_coder_no_tools_when_not_triggered(): def test_dummy_coder_tool_error_capture(): """Test that DummyCoder properly captures tool errors.""" coder = DummyCoder(workdir="test") - + # Run with input that triggers an error output = coder.run("Use tool with error") - + # Verify error tool is captured assert output.tool_uses is not None assert len(output.tool_uses) == 1 - + error_tool = output.tool_uses[0] assert error_tool.name == "mcp__test__failing_tool" assert error_tool.success is False @@ -100,22 +103,22 @@ def test_dummy_coder_tool_error_capture(): def test_dummy_coder_tool_serialization(): """Test that tool uses can be serialized properly.""" coder = DummyCoder(workdir="test") - + # Run with tool trigger output = coder.run("Use MCP tool") - + # Verify tool uses can be converted to dict (for serialization) assert output.tool_uses is not None tool_dict = output.tool_uses[0].model_dump() - + assert isinstance(tool_dict, dict) assert "name" in tool_dict assert "arguments" in tool_dict assert "success" in tool_dict assert "error" in tool_dict assert "result" in tool_dict - + # Verify values assert tool_dict["name"] == "mcp__dummy__test_tool" assert tool_dict["success"] is True - assert tool_dict["error"] is None \ No newline at end of file + assert tool_dict["error"] is None diff --git a/tests/test_dummy_tool_use.py b/tests/test_dummy_tool_use.py index ca3f2b4..f9f675e 100644 --- a/tests/test_dummy_tool_use.py +++ b/tests/test_dummy_tool_use.py @@ -1,4 +1,5 @@ """Test DummyCoder fake tool use generation.""" + from metacoder.coders.dummy import DummyCoder @@ -6,7 +7,7 @@ def test_dummy_no_tools(): """Test that dummy coder doesn't add tools when not mentioned.""" coder = DummyCoder(workdir="test") output = coder.run("What is 2 + 2?") - + assert output.stdout == "you said: What is 2 + 2?" assert output.tool_uses is None @@ -15,10 +16,10 @@ def test_dummy_default_tool(): """Test that dummy coder adds default tool when mentioned.""" coder = DummyCoder(workdir="test") output = coder.run("Use a tool to help me") - + assert output.tool_uses is not None assert len(output.tool_uses) == 1 - + tool = output.tool_uses[0] assert tool.name == "mcp__dummy__test_tool" assert tool.arguments == {"input": "Use a tool to help me"} @@ -31,10 +32,10 @@ def test_dummy_pubmed_search(): """Test that dummy coder simulates PubMed search.""" coder = DummyCoder(workdir="test") output = coder.run("Search PubMed for papers about cancer") - + assert output.tool_uses is not None assert len(output.tool_uses) == 1 - + tool = output.tool_uses[0] assert tool.name == "mcp__pubmed__search_papers" assert tool.arguments == {"query": "test query", "limit": 10} @@ -47,10 +48,10 @@ def test_dummy_tool_error(): """Test that dummy coder simulates tool errors.""" coder = DummyCoder(workdir="test") output = coder.run("Use MCP tool but simulate an error") - + assert output.tool_uses is not None assert len(output.tool_uses) == 1 - + tool = output.tool_uses[0] assert tool.name == "mcp__test__failing_tool" assert tool.arguments == {"param": "value"} @@ -63,15 +64,15 @@ def test_dummy_multiple_tools(): """Test that dummy coder can simulate multiple tools.""" coder = DummyCoder(workdir="test") output = coder.run("Search PubMed and then simulate an error with MCP") - + assert output.tool_uses is not None assert len(output.tool_uses) == 2 - + # First tool - PubMed search tool1 = output.tool_uses[0] assert tool1.name == "mcp__pubmed__search_papers" assert tool1.success is True - + # Second tool - error simulation tool2 = output.tool_uses[1] assert tool2.name == "mcp__test__failing_tool" @@ -82,10 +83,10 @@ def test_dummy_mcp_keyword(): """Test that MCP keyword triggers tool use.""" coder = DummyCoder(workdir="test") output = coder.run("Test MCP functionality") - + assert output.tool_uses is not None assert len(output.tool_uses) == 1 - + tool = output.tool_uses[0] assert tool.name == "mcp__dummy__test_tool" - assert tool.success is True \ No newline at end of file + assert tool.success is True diff --git a/tests/test_evals/test_deep_eval.py b/tests/test_evals/test_deep_eval.py deleted file mode 100644 index c55dec2..0000000 --- a/tests/test_evals/test_deep_eval.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Test the deepeval library. - -https://github.com/metacoder-ai/deepeval - -Note this doesn't actually test any metacoder functonality, it is more to explore -deepeval metrics, it can probably be removed in the future. -""" - -from deepeval.metrics import GEval -from deepeval.test_case import LLMTestCaseParams -from deepeval import evaluate -from deepeval.metrics import ( - FaithfulnessMetric, - HallucinationMetric, -) -from deepeval.test_case import LLMTestCase -import pytest - - -@pytest.mark.llm -@pytest.mark.parametrize("metric_cls", [FaithfulnessMetric]) -def test_generic_eval(metric_cls): - """Test FaithfulnessMetric with correct output matching context.""" - metric = metric_cls(threshold=0.7) - test_case = LLMTestCase( - input="What is the title of PMID:28027860?", - expected_output="The answer to the question 'what is the title of PMID:28027860?' is 'From nocturnal frontal lobe epilepsy to Sleep-Related Hypermotor Epilepsy.'", - actual_output='The answer to the question "what is the title of PMID:28027860?" is "From nocturnal frontal lobe epilepsy to Sleep-Related Hypermotor Epilepsy."', - context=[ - "Title: From nocturnal frontal lobe epilepsy to Sleep-Related Hypermotor Epilepsy." - ], - retrieval_context=[ - "PMID:28027860? Title: From nocturnal frontal lobe epilepsy to Sleep-Related Hypermotor Epilepsy." - ], - ) - results = evaluate([test_case], [metric]) - import yaml - - print(results) - print(yaml.dump(results.model_dump())) - - -@pytest.mark.llm -@pytest.mark.parametrize("metric_cls", [HallucinationMetric]) -def test_hallucination_eval(metric_cls): - """Test HallucinationMetric detects incorrect information not supported by context.""" - metric = metric_cls(threshold=0.7) - test_case = LLMTestCase( - input="What is the title of PMID:28027860?", - expected_output="From nocturnal frontal lobe epilepsy to Sleep-Related Hypermotor Epilepsy.", - actual_output='The title of the article with PMID:28027860 is "Predictors of acute and persisting fatigue in people with relapsing and remitting multiple sclerosis: A cohort study."', - context=[ - "Title of PMID:28027860: From nocturnal frontal lobe epilepsy to Sleep-Related Hypermotor Epilepsy." - ], - ) - results = evaluate([test_case], [metric]) - import yaml - - print(results) - print(yaml.dump(results.model_dump())) - - - - -correctness_metric = GEval( - name="Correctness", - criteria="Determine whether the actual output is factually correct based on the expected output.", - # NOTE: you can only provide either criteria or evaluation_steps, and not both - evaluation_steps=[ - "Check whether the facts in 'actual output' contradicts any facts in 'expected output'", - "You should also heavily penalize omission of detail", - "Vague language, or contradicting OPINIONS, are OK", - ], - threshold=0.8, - evaluation_params=[ - LLMTestCaseParams.INPUT, - LLMTestCaseParams.ACTUAL_OUTPUT, - LLMTestCaseParams.EXPECTED_OUTPUT, - ], -) - - -@pytest.mark.llm -def test_geval_eval(): - """Test GEval correctness metric catches factual errors in output.""" - metric = correctness_metric - test_case = LLMTestCase( - input="What is the title of PMID:28027860?", - expected_output="From nocturnal frontal lobe epilepsy to Sleep-Related Hypermotor Epilepsy.", - actual_output='The title of the article with PMID:28027860 is "Predictors of acute and persisting fatigue in people with relapsing and remitting multiple sclerosis: A cohort study."', - ) - results = evaluate([test_case], [metric]) - import yaml - - print(results) - print(yaml.dump(results.model_dump())) diff --git a/tests/test_evals/test_runner.py b/tests/test_evals/test_runner.py index 0515237..d1f0c3e 100644 --- a/tests/test_evals/test_runner.py +++ b/tests/test_evals/test_runner.py @@ -1,6 +1,9 @@ """Tests for the evaluation runner. This uses only dummy coders, so can be used in non-integration contexts. + +TODO: some of these are marked llm because they use an LLM in the eval +phase, even if they use a dummy coder - figure a way to have a dummy LLM Eval too """ import pytest @@ -137,6 +140,7 @@ def test_create_test_case_with_list_context(self): test_case = runner.create_test_case(eval_case, "4") assert test_case.retrieval_context == ["Math fact 1", "Math fact 2"] + @pytest.mark.llm def test_run_single_eval_with_dummy(self, simple_config, tmp_path): """Test running a single evaluation with dummy coder.""" runner = EvalRunner() @@ -244,6 +248,7 @@ def test_save_and_load_results(self, tmp_path): assert data["results"][0]["model"] == "model1" assert data["results"][0]["score"] == 0.9 + @pytest.mark.llm def test_run_all_evals_with_dummy(self, simple_config, tmp_path): """Test running all evaluations with dummy coder.""" runner = EvalRunner() diff --git a/tests/test_goose_tool_use.py b/tests/test_goose_tool_use.py index 94d83aa..3b643ae 100644 --- a/tests/test_goose_tool_use.py +++ b/tests/test_goose_tool_use.py @@ -1,4 +1,5 @@ """Test GooseCoder tool use extraction.""" + from metacoder.coders.base_coder import ToolUse @@ -13,7 +14,7 @@ def test_goose_tool_use_extraction(): "content": [ { "type": "text", - "text": "I'll help you find information about diseases associated with ITPR1 mutations." + "text": "I'll help you find information about diseases associated with ITPR1 mutations.", }, { "type": "toolRequest", @@ -22,11 +23,11 @@ def test_goose_tool_use_extraction(): "status": "success", "value": { "name": "pubmed__get_paper_fulltext", - "arguments": {"pmid": "35743164"} - } - } - } - ] + "arguments": {"pmid": "35743164"}, + }, + }, + }, + ], }, { "id": None, @@ -38,22 +39,17 @@ def test_goose_tool_use_extraction(): "id": "toolu_01RbESTBH9tyWu9Q9uAVRjja", "toolResult": { "status": "success", - "value": [ - { - "type": "text", - "text": "Paper content here..." - } - ] - } + "value": [{"type": "text", "text": "Paper content here..."}], + }, } - ] - } + ], + }, ] - + # Process structured messages to extract tool uses (mimicking goose logic) tool_uses = [] pending_tool_uses = {} - + for message in structured_messages: # Check for tool requests in assistant messages if message.get("role") == "assistant" and "content" in message: @@ -61,21 +57,21 @@ def test_goose_tool_use_extraction(): if isinstance(content, dict) and content.get("type") == "toolRequest": tool_id = content.get("id") tool_call = content.get("toolCall", {}) - + if tool_call.get("status") == "success": tool_value = tool_call.get("value", {}) tool_name = tool_value.get("name", "") tool_args = tool_value.get("arguments", {}) - + # Store pending tool use pending_tool_uses[tool_id] = { "name": tool_name, "arguments": tool_args, "success": False, "error": None, - "result": None + "result": None, } - + # Check for tool responses in user messages elif message.get("role") == "user" and "content" in message: for content in message.get("content", []): @@ -84,7 +80,7 @@ def test_goose_tool_use_extraction(): if tool_id in pending_tool_uses: tool_data = pending_tool_uses[tool_id] tool_result = content.get("toolResult", {}) - + # Update with result if tool_result.get("status") == "success": tool_data["success"] = True @@ -93,23 +89,32 @@ def test_goose_tool_use_extraction(): if isinstance(result_value, list): result_texts = [] for item in result_value: - if isinstance(item, dict) and item.get("type") == "text": + if ( + isinstance(item, dict) + and item.get("type") == "text" + ): result_texts.append(item.get("text", "")) - tool_data["result"] = "\n".join(result_texts) if result_texts else str(result_value) + tool_data["result"] = ( + "\n".join(result_texts) + if result_texts + else str(result_value) + ) else: tool_data["result"] = str(result_value) else: tool_data["success"] = False - tool_data["error"] = tool_result.get("error", "Tool execution failed") + tool_data["error"] = tool_result.get( + "error", "Tool execution failed" + ) tool_data["result"] = None - + # Create ToolUse object tool_use = ToolUse(**tool_data) tool_uses.append(tool_use) - + # Remove from pending del pending_tool_uses[tool_id] - + # Verify extraction assert len(tool_uses) == 1 tool_use = tool_uses[0] @@ -132,13 +137,10 @@ def test_goose_tool_use_error(): "id": "toolu_test", "toolCall": { "status": "success", - "value": { - "name": "test_tool", - "arguments": {"param": "value"} - } - } + "value": {"name": "test_tool", "arguments": {"param": "value"}}, + }, } - ] + ], }, { "role": "user", @@ -148,37 +150,37 @@ def test_goose_tool_use_error(): "id": "toolu_test", "toolResult": { "status": "error", - "error": "Tool failed to execute" - } + "error": "Tool failed to execute", + }, } - ] - } + ], + }, ] - + # Process structured messages to extract tool uses tool_uses = [] pending_tool_uses = {} - + for message in structured_messages: if message.get("role") == "assistant" and "content" in message: for content in message.get("content", []): if isinstance(content, dict) and content.get("type") == "toolRequest": tool_id = content.get("id") tool_call = content.get("toolCall", {}) - + if tool_call.get("status") == "success": tool_value = tool_call.get("value", {}) tool_name = tool_value.get("name", "") tool_args = tool_value.get("arguments", {}) - + pending_tool_uses[tool_id] = { "name": tool_name, "arguments": tool_args, "success": False, "error": None, - "result": None + "result": None, } - + elif message.get("role") == "user" and "content" in message: for content in message.get("content", []): if isinstance(content, dict) and content.get("type") == "toolResponse": @@ -186,27 +188,36 @@ def test_goose_tool_use_error(): if tool_id in pending_tool_uses: tool_data = pending_tool_uses[tool_id] tool_result = content.get("toolResult", {}) - + if tool_result.get("status") == "success": tool_data["success"] = True result_value = tool_result.get("value", []) if isinstance(result_value, list): result_texts = [] for item in result_value: - if isinstance(item, dict) and item.get("type") == "text": + if ( + isinstance(item, dict) + and item.get("type") == "text" + ): result_texts.append(item.get("text", "")) - tool_data["result"] = "\n".join(result_texts) if result_texts else str(result_value) + tool_data["result"] = ( + "\n".join(result_texts) + if result_texts + else str(result_value) + ) else: tool_data["result"] = str(result_value) else: tool_data["success"] = False - tool_data["error"] = tool_result.get("error", "Tool execution failed") + tool_data["error"] = tool_result.get( + "error", "Tool execution failed" + ) tool_data["result"] = None - + tool_use = ToolUse(**tool_data) tool_uses.append(tool_use) del pending_tool_uses[tool_id] - + # Verify error handling assert len(tool_uses) == 1 tool_use = tool_uses[0] @@ -230,11 +241,11 @@ def test_goose_multiple_tools(): "status": "success", "value": { "name": "search_tool", - "arguments": {"query": "test"} - } - } + "arguments": {"query": "test"}, + }, + }, } - ] + ], }, { "role": "user", @@ -244,10 +255,10 @@ def test_goose_multiple_tools(): "id": "tool1", "toolResult": { "status": "success", - "value": [{"type": "text", "text": "Search results"}] - } + "value": [{"type": "text", "text": "Search results"}], + }, } - ] + ], }, { "role": "assistant", @@ -259,11 +270,11 @@ def test_goose_multiple_tools(): "status": "success", "value": { "name": "fetch_tool", - "arguments": {"url": "http://example.com"} - } - } + "arguments": {"url": "http://example.com"}, + }, + }, } - ] + ], }, { "role": "user", @@ -273,24 +284,24 @@ def test_goose_multiple_tools(): "id": "tool2", "toolResult": { "status": "success", - "value": [{"type": "text", "text": "Fetched content"}] - } + "value": [{"type": "text", "text": "Fetched content"}], + }, } - ] - } + ], + }, ] - + # Process structured messages tool_uses = [] pending_tool_uses = {} - + for message in structured_messages: if message.get("role") == "assistant" and "content" in message: for content in message.get("content", []): if isinstance(content, dict) and content.get("type") == "toolRequest": tool_id = content.get("id") tool_call = content.get("toolCall", {}) - + if tool_call.get("status") == "success": tool_value = tool_call.get("value", {}) pending_tool_uses[tool_id] = { @@ -298,9 +309,9 @@ def test_goose_multiple_tools(): "arguments": tool_value.get("arguments", {}), "success": False, "error": None, - "result": None + "result": None, } - + elif message.get("role") == "user" and "content" in message: for content in message.get("content", []): if isinstance(content, dict) and content.get("type") == "toolResponse": @@ -308,22 +319,29 @@ def test_goose_multiple_tools(): if tool_id in pending_tool_uses: tool_data = pending_tool_uses[tool_id] tool_result = content.get("toolResult", {}) - + if tool_result.get("status") == "success": tool_data["success"] = True result_value = tool_result.get("value", []) if isinstance(result_value, list): result_texts = [] for item in result_value: - if isinstance(item, dict) and item.get("type") == "text": + if ( + isinstance(item, dict) + and item.get("type") == "text" + ): result_texts.append(item.get("text", "")) - tool_data["result"] = "\n".join(result_texts) if result_texts else str(result_value) + tool_data["result"] = ( + "\n".join(result_texts) + if result_texts + else str(result_value) + ) else: tool_data["result"] = str(result_value) - + tool_uses.append(ToolUse(**tool_data)) del pending_tool_uses[tool_id] - + # Verify multiple tools assert len(tool_uses) == 2 assert tool_uses[0].name == "search_tool" @@ -331,4 +349,4 @@ def test_goose_multiple_tools(): assert tool_uses[0].result == "Search results" assert tool_uses[1].name == "fetch_tool" assert tool_uses[1].success is True - assert tool_uses[1].result == "Fetched content" \ No newline at end of file + assert tool_uses[1].result == "Fetched content" diff --git a/tests/test_instructions_option.py b/tests/test_instructions_option.py index 681e275..ff9cc4e 100644 --- a/tests/test_instructions_option.py +++ b/tests/test_instructions_option.py @@ -21,7 +21,7 @@ def test_instructions_option_with_dummy_coder(runner): instructions_file = Path(temp_dir) / "test_instructions.md" instructions_content = "# Test Instructions\n\nBe helpful and concise." instructions_file.write_text(instructions_content) - + # Run with instructions result = runner.invoke( main, @@ -36,7 +36,7 @@ def test_instructions_option_with_dummy_coder(runner): temp_dir, ], ) - + # Check that instructions were loaded assert result.exit_code == 0 assert "Loaded instructions from:" in result.output @@ -58,7 +58,7 @@ def test_no_instructions_still_works(runner): temp_dir, ], ) - + assert result.exit_code == 0 assert "you said: Hello" in result.output assert "Instructions loaded:" not in result.output @@ -80,7 +80,7 @@ def test_instructions_file_not_found(runner): temp_dir, ], ) - + # Should fail with appropriate error assert result.exit_code != 0 assert "does not exist" in result.output @@ -98,11 +98,11 @@ def test_instructions_with_config(runner): extensions: [] """ config_file.write_text(config_content) - + # Create instructions file instructions_file = Path(temp_dir) / "instructions.md" instructions_file.write_text("Custom instructions") - + result = runner.invoke( main, [ @@ -118,6 +118,6 @@ def test_instructions_with_config(runner): temp_dir, ], ) - + assert result.exit_code == 0 - assert "Loaded instructions from:" in result.output \ No newline at end of file + assert "Loaded instructions from:" in result.output diff --git a/tests/test_introspect_mcp.py b/tests/test_introspect_mcp.py index 2f51ae0..4e5fd59 100644 --- a/tests/test_introspect_mcp.py +++ b/tests/test_introspect_mcp.py @@ -6,7 +6,7 @@ def test_introspect_mcp_help(): """Test introspect-mcp help command.""" runner = CliRunner() result = runner.invoke(cli, ["introspect-mcp", "--help"]) - + assert result.exit_code == 0 assert "Introspect an MCP server" in result.output assert "MCP_SPEC" in result.output @@ -17,12 +17,10 @@ def test_introspect_mcp_help(): def test_introspect_mcp_with_invalid_registry(): """Test introspect-mcp with non-existent registry MCP.""" runner = CliRunner() - result = runner.invoke(cli, [ - "introspect-mcp", - "nonexistent", - "--registry", "metacoder.basics" - ]) - + result = runner.invoke( + cli, ["introspect-mcp", "nonexistent", "--registry", "metacoder.basics"] + ) + assert result.exit_code != 0 assert "not found in registry" in result.output @@ -30,12 +28,9 @@ def test_introspect_mcp_with_invalid_registry(): def test_introspect_mcp_with_registry_no_mcp(): """Test introspect-mcp with invalid registry.""" runner = CliRunner() - result = runner.invoke(cli, [ - "introspect-mcp", - "fetch", - "--registry", "metacoder.nonexistent" - ]) - + result = runner.invoke( + cli, ["introspect-mcp", "fetch", "--registry", "metacoder.nonexistent"] + ) + assert result.exit_code != 0 assert "Registry file not found" in result.output - diff --git a/tests/test_mcps/test_gemini_mcp.py b/tests/test_mcps/test_gemini_mcp.py index 25b9c24..288f61c 100644 --- a/tests/test_mcps/test_gemini_mcp.py +++ b/tests/test_mcps/test_gemini_mcp.py @@ -13,7 +13,7 @@ def test_gemini_supports_mcp(): def test_gemini_mcp_config_conversion(): """Test conversion of MCPConfig to Gemini format.""" coder = GeminiCoder(workdir="/tmp/test") - + # Test stdio MCP mcp = MCPConfig( name="test_server", @@ -21,11 +21,11 @@ def test_gemini_mcp_config_conversion(): args=["-y", "@modelcontextprotocol/server-test"], env={"API_KEY": "${TEST_KEY}"}, enabled=True, - type=MCPType.STDIO + type=MCPType.STDIO, ) - + result = coder.mcp_config_to_gemini_format(mcp) - + assert result["command"] == "npx" assert result["args"] == ["-y", "@modelcontextprotocol/server-test"] assert result["env"] == {"API_KEY": "${TEST_KEY}"} @@ -35,13 +35,9 @@ def test_gemini_mcp_config_conversion(): def test_gemini_http_mcp_not_supported(): """Test that HTTP MCPs raise NotImplementedError.""" coder = GeminiCoder(workdir="/tmp/test") - - mcp = MCPConfig( - name="http_server", - enabled=True, - type=MCPType.HTTP - ) - + + mcp = MCPConfig(name="http_server", enabled=True, type=MCPType.HTTP) + with pytest.raises(NotImplementedError, match="HTTP MCPs are not supported"): coder.mcp_config_to_gemini_format(mcp) @@ -56,7 +52,7 @@ def test_gemini_mcp_settings_generation(): command="npx", args=["-y", "@modelcontextprotocol/server-filesystem"], enabled=True, - type=MCPType.STDIO + type=MCPType.STDIO, ), MCPConfig( name="github", @@ -64,41 +60,41 @@ def test_gemini_mcp_settings_generation(): args=["mcp-github"], env={"GITHUB_TOKEN": "${GITHUB_TOKEN}"}, enabled=True, - type=MCPType.STDIO + type=MCPType.STDIO, ), MCPConfig( name="disabled_server", command="uvx", args=["mcp-disabled"], enabled=False, - type=MCPType.STDIO + type=MCPType.STDIO, ), - ] + ], ) - + coder = GeminiCoder(workdir="/tmp/test", config=config) config_objects = coder.default_config_objects() - + # Should have created settings.json assert len(config_objects) == 1 settings_obj = config_objects[0] - + assert settings_obj.relative_path == ".gemini/settings.json" assert "mcpServers" in settings_obj.content - + mcp_servers = settings_obj.content["mcpServers"] - + # Should only include enabled servers assert "filesystem" in mcp_servers assert "github" in mcp_servers assert "disabled_server" not in mcp_servers - + # Check filesystem server config fs_config = mcp_servers["filesystem"] assert fs_config["command"] == "npx" assert fs_config["args"] == ["-y", "@modelcontextprotocol/server-filesystem"] assert fs_config["timeout"] == 30000 - + # Check github server config gh_config = mcp_servers["github"] assert gh_config["command"] == "uvx" @@ -111,6 +107,6 @@ def test_gemini_no_mcp_no_settings(): """Test that no settings.json is created when no MCPs are configured.""" coder = GeminiCoder(workdir="/tmp/test") config_objects = coder.default_config_objects() - + # Should not create any config files when no MCPs - assert len(config_objects) == 0 \ No newline at end of file + assert len(config_objects) == 0 diff --git a/tests/test_registry_loading.py b/tests/test_registry_loading.py index c96781e..c2f67a2 100644 --- a/tests/test_registry_loading.py +++ b/tests/test_registry_loading.py @@ -7,15 +7,15 @@ def test_load_mcp_registry_basics(): """Test loading basics registry.""" collection = load_mcp_registry("metacoder.basics") - + assert isinstance(collection, MCPCollectionConfig) assert len(collection.servers) > 0 - + # Check that fetch is in basics mcp_names = [mcp.name for mcp in collection.servers] assert "fetch" in mcp_names assert "taskmasterai" in mcp_names - + # Check that all are disabled by default for mcp in collection.servers: assert not mcp.enabled and mcp.enabled is not None @@ -24,10 +24,10 @@ def test_load_mcp_registry_basics(): def test_load_mcp_registry_scilit(): """Test loading scilit registry.""" collection = load_mcp_registry("metacoder.scilit") - + assert isinstance(collection, MCPCollectionConfig) assert len(collection.servers) > 0 - + # Check that scilit MCPs are present mcp_names = [mcp.name for mcp in collection.servers] assert "pdfreader" in mcp_names @@ -38,9 +38,9 @@ def test_load_mcp_registry_scilit(): def test_load_mcp_registry_all(): """Test loading all registries with 'metacoder'.""" collection = load_mcp_registry("metacoder") - + assert isinstance(collection, MCPCollectionConfig) - + # Should have MCPs from both basics and scilit mcp_names = [mcp.name for mcp in collection.servers] assert "fetch" in mcp_names # from basics @@ -50,7 +50,7 @@ def test_load_mcp_registry_all(): def test_load_mcp_registry_without_prefix(): """Test loading registry without metacoder prefix.""" collection = load_mcp_registry("basics") - + # Should work the same as with prefix mcp_names = [mcp.name for mcp in collection.servers] assert "fetch" in mcp_names @@ -59,17 +59,24 @@ def test_load_mcp_registry_without_prefix(): def test_cli_with_registry(): """Test CLI with registry option.""" runner = CliRunner() - + # Test with registry and enable specific MCP - result = runner.invoke(cli, [ - "run", - "test prompt", - "--coder", "dummy", - "--registry", "metacoder.basics", - "--enable-mcp", "fetch", - "--workdir", "test_workdir" - ]) - + result = runner.invoke( + cli, + [ + "run", + "test prompt", + "--coder", + "dummy", + "--registry", + "metacoder.basics", + "--enable-mcp", + "fetch", + "--workdir", + "test_workdir", + ], + ) + assert result.exit_code == 0 assert "Loading MCPs from registry: metacoder.basics" in result.output assert "Registry MCPs:" in result.output @@ -79,7 +86,7 @@ def test_cli_with_registry(): def test_cli_registry_with_mcp_collection(): """Test CLI with both registry and MCP collection.""" runner = CliRunner() - + # Create a temporary MCP collection file with runner.isolated_filesystem(): with open("test_mcps.yaml", "w") as f: @@ -91,18 +98,27 @@ def test_cli_registry_with_mcp_collection(): args: ["test"] enabled: true """) - - result = runner.invoke(cli, [ - "run", - "test prompt", - "--coder", "dummy", - "--mcp-collection", "test_mcps.yaml", - "--registry", "metacoder.basics", - "--enable-mcp", "fetch", - "--enable-mcp", "custom_mcp", - "--workdir", "test_workdir" - ]) - + + result = runner.invoke( + cli, + [ + "run", + "test prompt", + "--coder", + "dummy", + "--mcp-collection", + "test_mcps.yaml", + "--registry", + "metacoder.basics", + "--enable-mcp", + "fetch", + "--enable-mcp", + "custom_mcp", + "--workdir", + "test_workdir", + ], + ) + assert result.exit_code == 0 assert "Loading MCP collection from: test_mcps.yaml" in result.output assert "Loading MCPs from registry: metacoder.basics" in result.output @@ -113,5 +129,5 @@ def test_registry_nonexistent(): """Test loading nonexistent registry.""" with pytest.raises(Exception) as exc_info: load_mcp_registry("metacoder.nonexistent") - - assert "Registry file not found" in str(exc_info.value) \ No newline at end of file + + assert "Registry file not found" in str(exc_info.value) diff --git a/uv.lock b/uv.lock index 875df9f..b68d2f3 100644 --- a/uv.lock +++ b/uv.lock @@ -1515,6 +1515,7 @@ dev = [ { name = "mkdocstrings-python" }, { name = "mypy" }, { name = "pytest" }, + { name = "ruff" }, { name = "types-click" }, { name = "types-pyyaml" }, ] @@ -1539,6 +1540,7 @@ dev = [ { name = "mkdocstrings-python", specifier = ">=1.14.0" }, { name = "mypy", specifier = ">=1.17.1" }, { name = "pytest", specifier = ">=8.4.1" }, + { name = "ruff", specifier = ">=0.12.8" }, { name = "types-click", specifier = ">=7.1.8" }, { name = "types-pyyaml", specifier = ">=6.0.12.20250516" }, ] @@ -3271,6 +3273,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696 }, ] +[[package]] +name = "ruff" +version = "0.12.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4b/da/5bd7565be729e86e1442dad2c9a364ceeff82227c2dece7c29697a9795eb/ruff-0.12.8.tar.gz", hash = "sha256:4cb3a45525176e1009b2b64126acf5f9444ea59066262791febf55e40493a033", size = 5242373 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/1e/c843bfa8ad1114fab3eb2b78235dda76acd66384c663a4e0415ecc13aa1e/ruff-0.12.8-py3-none-linux_armv6l.whl", hash = "sha256:63cb5a5e933fc913e5823a0dfdc3c99add73f52d139d6cd5cc8639d0e0465513", size = 11675315 }, + { url = "https://files.pythonhosted.org/packages/24/ee/af6e5c2a8ca3a81676d5480a1025494fd104b8896266502bb4de2a0e8388/ruff-0.12.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9a9bbe28f9f551accf84a24c366c1aa8774d6748438b47174f8e8565ab9dedbc", size = 12456653 }, + { url = "https://files.pythonhosted.org/packages/99/9d/e91f84dfe3866fa648c10512904991ecc326fd0b66578b324ee6ecb8f725/ruff-0.12.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2fae54e752a3150f7ee0e09bce2e133caf10ce9d971510a9b925392dc98d2fec", size = 11659690 }, + { url = "https://files.pythonhosted.org/packages/fe/ac/a363d25ec53040408ebdd4efcee929d48547665858ede0505d1d8041b2e5/ruff-0.12.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0acbcf01206df963d9331b5838fb31f3b44fa979ee7fa368b9b9057d89f4a53", size = 11896923 }, + { url = "https://files.pythonhosted.org/packages/58/9f/ea356cd87c395f6ade9bb81365bd909ff60860975ca1bc39f0e59de3da37/ruff-0.12.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ae3e7504666ad4c62f9ac8eedb52a93f9ebdeb34742b8b71cd3cccd24912719f", size = 11477612 }, + { url = "https://files.pythonhosted.org/packages/1a/46/92e8fa3c9dcfd49175225c09053916cb97bb7204f9f899c2f2baca69e450/ruff-0.12.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb82efb5d35d07497813a1c5647867390a7d83304562607f3579602fa3d7d46f", size = 13182745 }, + { url = "https://files.pythonhosted.org/packages/5e/c4/f2176a310f26e6160deaf661ef60db6c3bb62b7a35e57ae28f27a09a7d63/ruff-0.12.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:dbea798fc0065ad0b84a2947b0aff4233f0cb30f226f00a2c5850ca4393de609", size = 14206885 }, + { url = "https://files.pythonhosted.org/packages/87/9d/98e162f3eeeb6689acbedbae5050b4b3220754554526c50c292b611d3a63/ruff-0.12.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:49ebcaccc2bdad86fd51b7864e3d808aad404aab8df33d469b6e65584656263a", size = 13639381 }, + { url = "https://files.pythonhosted.org/packages/81/4e/1b7478b072fcde5161b48f64774d6edd59d6d198e4ba8918d9f4702b8043/ruff-0.12.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ac9c570634b98c71c88cb17badd90f13fc076a472ba6ef1d113d8ed3df109fb", size = 12613271 }, + { url = "https://files.pythonhosted.org/packages/e8/67/0c3c9179a3ad19791ef1b8f7138aa27d4578c78700551c60d9260b2c660d/ruff-0.12.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:560e0cd641e45591a3e42cb50ef61ce07162b9c233786663fdce2d8557d99818", size = 12847783 }, + { url = "https://files.pythonhosted.org/packages/4e/2a/0b6ac3dd045acf8aa229b12c9c17bb35508191b71a14904baf99573a21bd/ruff-0.12.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:71c83121512e7743fba5a8848c261dcc454cafb3ef2934a43f1b7a4eb5a447ea", size = 11702672 }, + { url = "https://files.pythonhosted.org/packages/9d/ee/f9fdc9f341b0430110de8b39a6ee5fa68c5706dc7c0aa940817947d6937e/ruff-0.12.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:de4429ef2ba091ecddedd300f4c3f24bca875d3d8b23340728c3cb0da81072c3", size = 11440626 }, + { url = "https://files.pythonhosted.org/packages/89/fb/b3aa2d482d05f44e4d197d1de5e3863feb13067b22c571b9561085c999dc/ruff-0.12.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a2cab5f60d5b65b50fba39a8950c8746df1627d54ba1197f970763917184b161", size = 12462162 }, + { url = "https://files.pythonhosted.org/packages/18/9f/5c5d93e1d00d854d5013c96e1a92c33b703a0332707a7cdbd0a4880a84fb/ruff-0.12.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:45c32487e14f60b88aad6be9fd5da5093dbefb0e3e1224131cb1d441d7cb7d46", size = 12913212 }, + { url = "https://files.pythonhosted.org/packages/71/13/ab9120add1c0e4604c71bfc2e4ef7d63bebece0cfe617013da289539cef8/ruff-0.12.8-py3-none-win32.whl", hash = "sha256:daf3475060a617fd5bc80638aeaf2f5937f10af3ec44464e280a9d2218e720d3", size = 11694382 }, + { url = "https://files.pythonhosted.org/packages/f6/dc/a2873b7c5001c62f46266685863bee2888caf469d1edac84bf3242074be2/ruff-0.12.8-py3-none-win_amd64.whl", hash = "sha256:7209531f1a1fcfbe8e46bcd7ab30e2f43604d8ba1c49029bb420b103d0b5f76e", size = 12740482 }, + { url = "https://files.pythonhosted.org/packages/cb/5c/799a1efb8b5abab56e8a9f2a0b72d12bd64bb55815e9476c7d0a2887d2f7/ruff-0.12.8-py3-none-win_arm64.whl", hash = "sha256:c90e1a334683ce41b0e7a04f41790c429bf5073b62c1ae701c9dc5b3d14f0749", size = 11884718 }, +] + [[package]] name = "sentry-sdk" version = "2.34.1"