ambient-code · jeremyeder · Feb 6, 2026 · Jan 29, 2026 · Jan 29, 2026 · Feb 5, 2026
diff --git a/src/agentready/assessors/documentation.py b/src/agentready/assessors/documentation.py
@@ -453,8 +453,7 @@ def _create_remediation(self) -> Remediation:
             ],
             tools=[],
             commands=[],
-            examples=[
-                """# Project Name
+            examples=["""# Project Name
 
 ## Overview
 What this project does and why it exists.
@@ -477,8 +476,7 @@ def _create_remediation(self) -> Remediation:
 # Format code
 black .
 ```
-"""
-            ],
+"""],
             citations=[
                 Citation(
                     source="GitHub",

diff --git a/src/agentready/assessors/testing.py b/src/agentready/assessors/testing.py
@@ -286,8 +286,7 @@ def _create_remediation(self) -> Remediation:
                 "pre-commit install",
                 "pre-commit run --all-files",
             ],
-            examples=[
-                """# .pre-commit-config.yaml
+            examples=["""# .pre-commit-config.yaml
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.4.0
@@ -306,8 +305,7 @@ def _create_remediation(self) -> Remediation:
     rev: 5.12.0
     hooks:
       - id: isort
-"""
-            ],
+"""],
             citations=[
                 Citation(
                     source="pre-commit.com",

diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py
@@ -7,7 +7,7 @@
 
 import click
 
-from ..services.eval_harness.harbor_config import HarborConfig
+from ..services.eval_harness.harbor_config import ALLOWED_MODELS, HarborConfig
 from ..services.eval_harness.tbench_runner import _real_tbench_result
 from ..services.harbor.agent_toggler import AssessorStateToggler
 from ..services.harbor.comparer import compare_assessor_impact
@@ -27,10 +27,16 @@
     default=None,
     help="Benchmark subset (tbench: smoketest/full)",
 )
+@click.option(
+    "--agent",
+    type=click.Choice(["claude-code", "cursor-cli"]),
+    default="claude-code",
+    help="Agent for evaluation",
+)
 @click.option(
     "--model",
-    type=click.Choice(["claude-haiku-4-5", "claude-sonnet-4-5"]),
-    default="claude-haiku-4-5",
+    type=click.Choice(list(ALLOWED_MODELS)),
+    default="anthropic/claude-haiku-4-5",
     help="Model for evaluation",
 )
 @click.option("--verbose", "-v", is_flag=True, help="Enable verbose output")
@@ -53,7 +59,15 @@
     help="Skip dependency checks (for advanced users)",
 )
 def benchmark(
-    repository, harness, subset, model, verbose, timeout, output_dir, skip_preflight
+    repository,
+    harness,
+    subset,
+    agent,
+    model,
+    verbose,
+    timeout,
+    output_dir,
+    skip_preflight,
 ):
     """Run agent coding benchmarks.
 
@@ -81,14 +95,23 @@ def benchmark(
     # Route to appropriate harness
     if harness == "tbench":
         _run_tbench(
-            repo_path, subset, model, verbose, timeout, output_dir, skip_preflight
+            repo_path,
+            subset,
+            agent,
+            model,
+            verbose,
+            timeout,
+            output_dir,
+            skip_preflight,
         )
     else:
         click.echo(f"Unknown harness: {harness}", err=True)
         raise click.Abort()
 
 
-def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_preflight):
+def _run_tbench(
+    repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight
+):
     """Run Terminal-Bench evaluation."""
     # Default subset to 'full' if not specified
     if subset is None:
@@ -107,6 +130,7 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
         click.echo("AgentReady Terminal-Bench Benchmark")
         click.echo(f"{'=' * 50}\n")
         click.echo(f"Repository: {repo_path}")
+        click.echo(f"Agent: {agent}")
         click.echo(f"Model: {model}")
         click.echo(f"Subset: {subset} ({'1-2 tasks' if smoketest else '89 tasks'})")
         click.echo(f"Timeout: {timeout}s\n")
@@ -135,19 +159,24 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
             raise click.Abort()
 
     # Validate API key BEFORE creating HarborConfig
-    api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    if agent == "claude-code":
+        api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    elif agent == "cursor-cli":
+        api_key = os.environ.get("CURSOR_API_KEY", "")
+
     if not api_key:
+        key_name = "ANTHROPIC_API_KEY" if agent == "claude-code" else "CURSOR_API_KEY"
         click.echo(
-            "Error: ANTHROPIC_API_KEY environment variable not set.\n"
-            "Set it with: export ANTHROPIC_API_KEY=your-key-here",
+            f"Error: {key_name} environment variable not set.\n"
+            f"Set it with: export {key_name}=your-key-here",
             err=True,
         )
         raise click.Abort()
 
     # Create HarborConfig (will not raise ValueError now)
     harbor_config = HarborConfig(
-        model=f"anthropic/{model}",
-        agent="claude-code",
+        model=model,
+        agent=agent,
         jobs_dir=Path(tempfile.mkdtemp()),
         api_key=api_key,
         timeout=timeout,

diff --git a/src/agentready/services/assessment_cache.py b/src/agentready/services/assessment_cache.py
@@ -33,8 +33,7 @@ def _initialize_db(self) -> None:
         """Initialize database schema."""
         try:
             with sqlite3.connect(self.db_path) as conn:
-                conn.execute(
-                    """
+                conn.execute("""
                     CREATE TABLE IF NOT EXISTS assessments (
                         id INTEGER PRIMARY KEY AUTOINCREMENT,
                         repository_url TEXT NOT NULL,
@@ -45,23 +44,18 @@ def _initialize_db(self) -> None:
                         expires_at TIMESTAMP,
                         UNIQUE(repository_url, commit_hash)
                     )
-                    """
-                )
+                    """)
 
                 # Create index for faster queries
-                conn.execute(
-                    """
+                conn.execute("""
                     CREATE INDEX IF NOT EXISTS idx_repo_commit
                     ON assessments(repository_url, commit_hash)
-                    """
-                )
+                    """)
 
-                conn.execute(
-                    """
+                conn.execute("""
                     CREATE INDEX IF NOT EXISTS idx_expires_at
                     ON assessments(expires_at)
-                    """
-                )
+                    """)
 
                 conn.commit()
         except sqlite3.Error as e:

diff --git a/src/agentready/services/eval_harness/harbor_config.py b/src/agentready/services/eval_harness/harbor_config.py
@@ -9,14 +9,27 @@
 from typing import Optional
 
 # Allowed models (excludes opus due to cost)
+# Anthropic models: https://platform.claude.com/docs/en/about-claude/models/overview
+# Cursor models: https://cursor.com/docs/models
 ALLOWED_MODELS = {
     "anthropic/claude-haiku-4-5",
     "anthropic/claude-sonnet-4-5",
+    "cursor/composer-1",
+    "cursor/gpt-5.2-codex",
+    "cursor/gpt-5.2-codex-fast",
+    "cursor/gemini-3-pro",
+    "cursor/opus-4.5",
+    "cursor/sonnet-4.5",
+    "cursor/sonnet-4.5-thinking",
+    "cursor/gpt-5.1-high",
+    "cursor/gemini-3-flash",
 }
 
 # Allowed agents (excludes oracle as it's not relevant for real-world assessment)
+# Harbor supported agents: https://github.com/laude-institute/harbor/blob/main/src/harbor/agents/factory.py
 ALLOWED_AGENTS = {
     "claude-code",
+    "cursor-cli",
 }
 
 

diff --git a/src/agentready/services/eval_harness/tbench_runner.py b/src/agentready/services/eval_harness/tbench_runner.py
@@ -125,31 +125,48 @@
     # Pass through current environment but ensure API key is set
     # Harbor's claude-code agent has MiniMax API hardcoded - override it
     clean_env = os.environ.copy()
-    clean_env["ANTHROPIC_API_KEY"] = config.api_key
-    clean_env["ANTHROPIC_AUTH_TOKEN"] = config.api_key  # Harbor uses this
-    clean_env["ANTHROPIC_BASE_URL"] = "https://api.anthropic.com"  # Override MiniMax
-    clean_env["ANTHROPIC_API_BASE"] = "https://api.anthropic.com"  # Alternative var
+
+    # Define agent-specific environment variable configurations
+    # Structure: (Env Key, Env Value, Is Sensitive)
+    agent_env_configs = {
+        "claude-code": [
+            ("ANTHROPIC_API_KEY", config.api_key, True),
+            ("ANTHROPIC_AUTH_TOKEN", config.api_key, True),
+            ("ANTHROPIC_BASE_URL", "https://api.anthropic.com", False),
+            ("ANTHROPIC_API_BASE", "https://api.anthropic.com", False),
+        ],
+        "cursor-cli": [
+            ("CURSOR_API_KEY", config.api_key, True),
+        ],
+    }
+
+    if config.agent not in agent_env_configs:
+        raise ValueError(f"Invalid agent: {config.agent}")
+
+    # Set environment variables and build display/copyable lists
+    env_vars_display = []
+    env_vars_copyable = []
+
+    for var_name, var_value, is_sensitive in agent_env_configs[config.agent]:
+        clean_env[var_name] = var_value
+
+        # Build display string (truncate sensitive values)
+        if not is_sensitive:
+            display_value = var_value
+        env_vars_display.append(f"{var_name}={display_value}")
+
+        # Build copyable string (use variable reference for sensitive values)
+        if is_sensitive:
+            copyable_value = f"${var_name}"
+        else:
+            copyable_value = var_value
+        env_vars_copyable.append(f"{var_name}={copyable_value}")
+
     # Clear MiniMax settings if present
     clean_env.pop("MINIMAX_API_KEY", None)
 
     # Print Harbor command for debugging and manual execution
     shell_cmd = " ".join(shlex.quote(arg) for arg in cmd)
-
-    # Prepare environment variable strings (truncate API key for security in display)
-    env_vars_display = [
-        f"ANTHROPIC_API_KEY={config.api_key[:20]}...",  # Truncated for display
-        f"ANTHROPIC_AUTH_TOKEN={config.api_key[:20]}...",
-        f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}",
-        f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}",
-    ]
-
-    # Full command for copy/paste (use $ANTHROPIC_API_KEY to avoid exposing key)
-    env_vars_copyable = [
-        "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY",
-        "ANTHROPIC_AUTH_TOKEN=$ANTHROPIC_API_KEY",
-        f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}",
-        f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}",
-    ]
     full_cmd_copyable = " ".join(env_vars_copyable) + " " + shell_cmd
 
     print(f"\n{'=' * 70}")
@@ -157,7 +174,7 @@
    print(f"{'=' * 70}")
    print(f"\n{full_cmd_copyable}\n")
    print(f"{'=' * 70}")
    print("Command Breakdown:")
    print(f"{'=' * 70}")
    print(f"\nCommand: {shell_cmd}\n")
    print("Environment Variables:")

diff --git a/tests/e2e/test_critical_paths.py b/tests/e2e/test_critical_paths.py
@@ -276,14 +276,12 @@ def test_assess_with_valid_config(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
             # Create valid config file
             config_file = Path(tmp_dir) / "config.yaml"
-            config_file.write_text(
-                """
+            config_file.write_text("""
 weights:
   claude_md: 2.0
 excluded_attributes:
   - repomix_config
-"""
-            )
+""")
 
             output_dir = Path(tmp_dir) / "output"
 

diff --git a/tests/e2e/test_critical_paths_simplified.py b/tests/e2e/test_critical_paths_simplified.py
@@ -219,14 +219,12 @@ def test_valid_config_application(self, temp_output_dir):
         with tempfile.TemporaryDirectory() as tmp_dir:
             # Create valid config
             config_file = Path(tmp_dir) / "config.yaml"
-            config_file.write_text(
-                """
+            config_file.write_text("""
 weights:
   claude_md: 2.0
 excluded_attributes:
   - repomix_config
-"""
-            )
+""")
 
             # Run assessment with config
             result = helper.run_assessment(

diff --git a/tests/unit/cli/test_main.py b/tests/unit/cli/test_main.py
@@ -355,14 +355,12 @@ class TestConfigLoading:
     def test_load_config_valid_yaml(self, tmp_path):
         """Test loading valid config file."""
         config_file = tmp_path / "config.yaml"
-        config_file.write_text(
-            """
+        config_file.write_text("""
 weights:
   claude_md_file: 2.0
 excluded_attributes:
   - test_attribute
-"""
-        )
+""")
 
         config = load_config(config_file)