diff --git a/.github/workflows/system.yml b/.github/workflows/system.yml index 0a78ee7..c90c343 100644 --- a/.github/workflows/system.yml +++ b/.github/workflows/system.yml @@ -21,17 +21,17 @@ jobs: - name: Set up commit0 run: uv run commit0 setup simpy - name: Build docker images - run: uv run commit0 build simpy + run: uv run commit0 build - name: Get tests run: uv run commit0 get-tests simpy - name: Test run: uv run commit0 test simpy tests/test_event.py::test_succeed --reference - name: Evaluate - run: uv run commit0 evaluate simpy --reference + run: uv run commit0 evaluate --reference - name: Lint run: uv run commit0 lint commit0/harness/lint.py - name: Save env: GITHUB_TOKEN: ${{ secrets.MY_GITHUB_TOKEN }} run: | - uv run commit0 save simpy test-save-commit0 master + uv run commit0 save test-save-commit0 master diff --git a/.gitignore b/.gitignore index 6807968..85e703a 100644 --- a/.gitignore +++ b/.gitignore @@ -164,4 +164,5 @@ cython_debug/ logs/ repos/ config.yml -hydra_outputs/ \ No newline at end of file +hydra_outputs/ +.commit0* \ No newline at end of file diff --git a/baselines/class_types.py b/baselines/class_types.py index af76738..03debfa 100644 --- a/baselines/class_types.py +++ b/baselines/class_types.py @@ -1,15 +1,6 @@ from dataclasses import dataclass -@dataclass -class Commit0Config: - base_dir: str - dataset_name: str - dataset_split: str - repo_split: str - num_workers: int - - @dataclass class AgentConfig: agent_name: str diff --git a/baselines/configs/agent.yaml b/baselines/configs/agent.yaml index 909134e..96f2bb7 100644 --- a/baselines/configs/agent.yaml +++ b/baselines/configs/agent.yaml @@ -3,9 +3,6 @@ defaults: - base - _self_ -commit0_config: - repo_split: minitorch - agent_config: use_user_prompt: false use_repo_info: false diff --git a/baselines/configs/base.yaml b/baselines/configs/base.yaml index f275643..2c7c976 100644 --- a/baselines/configs/base.yaml +++ b/baselines/configs/base.yaml @@ -1,15 +1,6 @@ defaults: - _self_ - - -commit0_config: - base_dir: repos - dataset_name: "wentingzhao/commit0_docstring" - dataset_split: "test" - repo_split: "simpy" - num_workers: 10 - agent_config: agent_name: "aider" model_name: "claude-3-5-sonnet-20240620" diff --git a/baselines/run_agent.py b/baselines/run_agent.py index 9142ae7..8738a9a 100644 --- a/baselines/run_agent.py +++ b/baselines/run_agent.py @@ -15,11 +15,12 @@ from typing import Optional, Type from types import TracebackType from hydra.core.config_store import ConfigStore -from baselines.class_types import AgentConfig, Commit0Config +from baselines.class_types import AgentConfig from commit0.harness.constants import SPLIT from commit0.harness.get_pytest_ids import main as get_tests from commit0.harness.constants import RUN_AIDER_LOG_DIR, RepoInstance from tqdm import tqdm +from commit0.cli import read_commit0_dot_file class DirContext: @@ -40,7 +41,7 @@ def __exit__( def run_agent_for_repo( - commit0_config: Commit0Config, + repo_base_dir: str, agent_config: AgentConfig, example: RepoInstance, ) -> None: @@ -55,7 +56,7 @@ def run_agent_for_repo( test_files_str = get_tests(repo_name, verbose=0) test_files = sorted(list(set([i.split(":")[0] for i in test_files_str]))) - repo_path = os.path.join(commit0_config.base_dir, repo_name) + repo_path = os.path.join(repo_base_dir, repo_name) repo_path = os.path.abspath(repo_path) try: local_repo = Repo(repo_path) @@ -82,13 +83,15 @@ def run_agent_for_repo( local_repo.git.reset("--hard", example["base_commit"]) target_edit_files = get_target_edit_files(repo_path) with DirContext(repo_path): - if commit0_config is None or agent_config is None: + if agent_config is None: raise ValueError("Invalid input") if agent_config.run_tests: # when unit test feedback is available, iterate over test files for test_file in test_files: - test_cmd = f"python -m commit0 test {repo_path} {run_id} {test_file}" + test_cmd = ( + f"python -m commit0 test {repo_path} {test_file} --branch {run_id}" + ) test_file_name = test_file.replace(".py", "").replace("/", "__") log_dir = RUN_AIDER_LOG_DIR / "with_tests" / test_file_name lint_cmd = get_lint_cmd(local_repo, agent_config.use_lint_info) @@ -119,26 +122,26 @@ def main() -> None: Will run in parallel for each repo. """ cs = ConfigStore.instance() - cs.store(name="user", node=Commit0Config) cs.store(name="user", node=AgentConfig) hydra.initialize(version_base=None, config_path="configs") config = hydra.compose(config_name="agent") - commit0_config = Commit0Config(**config.commit0_config) agent_config = AgentConfig(**config.agent_config) + commit0_config = read_commit0_dot_file(".commit0.yaml") + dataset = load_dataset( - commit0_config.dataset_name, split=commit0_config.dataset_split + commit0_config["dataset_name"], split=commit0_config["dataset_split"] ) filtered_dataset = [ example for example in dataset - if commit0_config.repo_split == "all" + if commit0_config["repo_split"] == "all" or ( isinstance(example, dict) and "repo" in example and isinstance(example["repo"], str) and example["repo"].split("/")[-1] - in SPLIT.get(commit0_config.repo_split, []) + in SPLIT.get(commit0_config["repo_split"], []) ) ] assert len(filtered_dataset) > 0, "No examples available" @@ -149,14 +152,14 @@ def main() -> None: with tqdm( total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos" ) as pbar: - with multiprocessing.Pool(processes=commit0_config.num_workers) as pool: + with multiprocessing.Pool(processes=10) as pool: results = [] # Use apply_async to submit jobs and add progress bar updates for example in filtered_dataset: result = pool.apply_async( run_agent_for_repo, - args=(commit0_config, agent_config, example), + args=(commit0_config["base_dir"], agent_config, example), callback=lambda _: pbar.update( 1 ), # Update progress bar on task completion diff --git a/commit0/__main__.py b/commit0/__main__.py index 9122a45..2bbfbb7 100644 --- a/commit0/__main__.py +++ b/commit0/__main__.py @@ -1,4 +1,4 @@ -from commit0.cli import app as commit0_app +from commit0.cli import commit0_app def main() -> None: diff --git a/commit0/cli.py b/commit0/cli.py index b0b0b94..7d0ade5 100644 --- a/commit0/cli.py +++ b/commit0/cli.py @@ -10,8 +10,20 @@ import commit0.harness.lint import commit0.harness.save from commit0.harness.constants import SPLIT, SPLIT_ALL +import subprocess +import yaml +import os -app = typer.Typer(add_completion=False) +commit0_app = typer.Typer( + no_args_is_help=True, + add_completion=False, + context_settings={"help_option_names": ["-h", "--help"]}, + help=""" + Commit-0 is a real-world AI coding challenge. Can your agent generate a working library from commit 0? + + See the website at https://commit-0.github.io/ for documentation and more information about Commit-0. + """, +) class Colors: @@ -22,6 +34,40 @@ class Colors: ORANGE = "\033[95m" +def check_commit0_path() -> None: + """Code adapted from https://github.com/modal-labs/modal-client/blob/a8ddd418f8c65b7e168a9125451eeb70da2b6203/modal/cli/entry_point.py#L55 + + Checks whether the `commit0` executable is on the path and usable. + """ + url = "https://commit-0.github.io/setup/" + try: + subprocess.run(["commit0", "--help"], capture_output=True) + # TODO(erikbern): check returncode? + return + except FileNotFoundError: + typer.echo( + typer.style( + "The `commit0` command was not found on your path!", fg=typer.colors.RED + ) + + "\n" + + typer.style( + "You may need to add it to your path or use `python -m commit0` as a workaround.", + fg=typer.colors.RED, + ) + ) + except PermissionError: + typer.echo( + typer.style("The `commit0` command is not executable!", fg=typer.colors.RED) + + "\n" + + typer.style( + "You may need to give it permissions or use `python -m commit0` as a workaround.", + fg=typer.colors.RED, + ) + ) + typer.echo(f"See more information here:\n\n{url}") + typer.echo("─" * 80) # Simple rule to separate content + + def highlight(text: str, color: str) -> str: """Highlight text with a color.""" return f"{color}{text}{Colors.RESET}" @@ -38,7 +84,22 @@ def check_valid(one: str, total: Union[list[str], dict[str, list[str]]]) -> None ) -@app.command() +def write_commit0_dot_file(dot_file_path: str, config: dict) -> None: + with open(dot_file_path, "w") as f: + yaml.dump(config, f, default_flow_style=False) + + +def read_commit0_dot_file(dot_file_path: str) -> dict: + # Check if the file exists before attempting to read it + if not os.path.exists(dot_file_path): + raise FileNotFoundError( + f"The commit0 dot file '{dot_file_path}' does not exist." + ) + with open(dot_file_path, "r") as f: + return yaml.load(f, Loader=yaml.FullLoader) + + +@commit0_app.command() def setup( repo_split: str = typer.Argument( ..., @@ -49,14 +110,23 @@ def setup( ), dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"), base_dir: str = typer.Option("repos/", help="Base directory to clone repos to"), + commit0_dot_file_path: str = typer.Option( + ".commit0.yaml", help="Storing path for stateful commit0 configs" + ), ) -> None: """Commit0 clone a repo split.""" + check_commit0_path() check_valid(repo_split, SPLIT) - typer.echo(f"Cloning repository for split: {repo_split}") - typer.echo(f"Dataset name: {dataset_name}") - typer.echo(f"Dataset split: {dataset_split}") - typer.echo(f"Base directory: {base_dir}") + base_dir = str(Path(base_dir).resolve()) + + typer.echo(f"Cloning repository for split: {highlight(repo_split, Colors.ORANGE)}") + typer.echo(f"Dataset name: {highlight(dataset_name, Colors.ORANGE)}") + typer.echo(f"Dataset split: {highlight(dataset_split, Colors.ORANGE)}") + typer.echo(f"Base directory: {highlight(base_dir, Colors.ORANGE)}") + typer.echo( + f"Commit0 dot file path: {highlight(commit0_dot_file_path, Colors.ORANGE)}" + ) commit0.harness.setup.main( dataset_name, @@ -65,18 +135,25 @@ def setup( base_dir, ) + # after successfully setup, write the commit0 dot file + write_commit0_dot_file( + commit0_dot_file_path, + { + "dataset_name": dataset_name, + "dataset_split": dataset_split, + "repo_split": repo_split, + "base_dir": base_dir, + }, + ) + -@app.command() +@commit0_app.command() def build( - repo_split: str = typer.Argument( - ..., - help=f"Split of repositories, one of {', '.join(highlight(key, Colors.ORANGE) for key in SPLIT.keys())}", - ), - dataset_name: str = typer.Option( - "wentingzhao/commit0_docstring", help="Name of the Huggingface dataset" - ), - dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"), num_workers: int = typer.Option(8, help="Number of workers"), + commit0_dot_file_path: str = typer.Option( + ".commit0.yaml", + help="Path to the commit0 dot file, where the setup config is stored", + ), verbose: int = typer.Option( 1, "--verbose", @@ -85,24 +162,33 @@ def build( count=True, ), ) -> None: - """Commit0 build a repository.""" - check_valid(repo_split, SPLIT) + """Build Commit0 split you choose in Setup Stage.""" + check_commit0_path() - typer.echo(f"Building repository for split: {repo_split}") - typer.echo(f"Dataset name: {dataset_name}") - typer.echo(f"Dataset split: {dataset_split}") - typer.echo(f"Number of workers: {num_workers}") + commit0_config = read_commit0_dot_file(commit0_dot_file_path) + check_valid(commit0_config["repo_split"], SPLIT) + + typer.echo( + f"Building repository for split: {highlight(commit0_config['repo_split'], Colors.ORANGE)}" + ) + typer.echo( + f"Dataset name: {highlight(commit0_config['dataset_name'], Colors.ORANGE)}" + ) + typer.echo( + f"Dataset split: {highlight(commit0_config['dataset_split'], Colors.ORANGE)}" + ) + typer.echo(f"Number of workers: {highlight(str(num_workers), Colors.ORANGE)}") commit0.harness.build.main( - dataset_name, - dataset_split, - repo_split, + commit0_config["dataset_name"], + commit0_config["dataset_split"], + commit0_config["repo_split"], num_workers, verbose, ) -@app.command() +@commit0_app.command() def get_tests( repo_name: str = typer.Argument( ..., @@ -110,6 +196,7 @@ def get_tests( ), ) -> None: """Get tests for a Commit0 repository.""" + check_commit0_path() check_valid(repo_name, SPLIT_ALL) typer.echo(f"Getting tests for repository: {repo_name}") @@ -117,7 +204,7 @@ def get_tests( commit0.harness.get_pytest_ids.main(repo_name, verbose=1) -@app.command() +@commit0_app.command() def test( repo_or_repo_path: str = typer.Argument( ..., help="Directory of the repository to test" @@ -129,17 +216,16 @@ def test( branch: Union[str, None] = typer.Option( None, help="Branch to test (branch MUST be provided or use --reference)" ), - dataset_name: str = typer.Option( - "wentingzhao/commit0_docstring", help="Name of the Huggingface dataset" - ), - dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"), - base_dir: str = typer.Option("repos/", help="Base directory of repos"), backend: str = typer.Option("local", help="Backend to use for testing"), timeout: int = typer.Option(1800, help="Timeout for tests in seconds"), num_cpus: int = typer.Option(1, help="Number of CPUs to use"), reference: Annotated[ bool, typer.Option("--reference", help="Test the reference commit.") ] = False, + commit0_dot_file_path: str = typer.Option( + ".commit0.yaml", + help="Path to the commit0 dot file, where the setup config is stored", + ), verbose: int = typer.Option( 1, "--verbose", @@ -149,9 +235,13 @@ def test( ), ) -> None: """Run tests on a Commit0 repository.""" + check_commit0_path() if repo_or_repo_path.endswith("/"): repo_or_repo_path = repo_or_repo_path[:-1] check_valid(repo_or_repo_path.split("/")[-1], SPLIT_ALL) + + commit0_config = read_commit0_dot_file(commit0_dot_file_path) + if not branch and not reference: raise typer.BadParameter( f"Invalid {highlight('BRANCH', Colors.RED)}. Either --reference or provide a branch name.", @@ -166,9 +256,9 @@ def test( typer.echo(f"Test IDs: {test_ids}") commit0.harness.run_pytest_ids.main( - dataset_name, - dataset_split, - base_dir, + commit0_config["dataset_name"], + commit0_config["dataset_split"], + commit0_config["base_dir"], repo_or_repo_path, branch, test_ids, @@ -179,20 +269,11 @@ def test( ) -@app.command() +@commit0_app.command() def evaluate( - repo_split: str = typer.Argument( - ..., - help=f"Split of repositories, one of {', '.join(highlight(key, Colors.ORANGE) for key in SPLIT.keys())}", - ), branch: Union[str, None] = typer.Option( None, help="Branch to evaluate (branch MUST be provided or use --reference)" ), - dataset_name: str = typer.Option( - "wentingzhao/commit0_docstring", help="Name of the Huggingface dataset" - ), - dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"), - base_dir: str = typer.Option("repos/", help="Base directory of repos"), backend: str = typer.Option("local", help="Backend to use for evaluation"), timeout: int = typer.Option(1800, help="Timeout for evaluation in seconds"), num_cpus: int = typer.Option(1, help="Number of CPUs to use"), @@ -200,8 +281,13 @@ def evaluate( reference: Annotated[ bool, typer.Option("--reference", help="Evaluate the reference commit.") ] = False, + commit0_dot_file_path: str = typer.Option( + ".commit0.yaml", + help="Path to the commit0 dot file, where the setup config is stored", + ), ) -> None: - """Evaluate a Commit0 repository.""" + """Evaluate Commit0 split you choose in Setup Stage.""" + check_commit0_path() if not branch and not reference: raise typer.BadParameter( f"Invalid {highlight('BRANCH', Colors.RED)}. Either --reference or provide a branch name", @@ -211,16 +297,17 @@ def evaluate( branch = "reference" assert branch is not None, "branch is not specified" - check_valid(repo_split, SPLIT) + commit0_config = read_commit0_dot_file(commit0_dot_file_path) + check_valid(commit0_config["repo_split"], SPLIT) - typer.echo(f"Evaluating repository split: {repo_split}") + typer.echo(f"Evaluating repository split: {commit0_config['repo_split']}") typer.echo(f"Branch: {branch}") commit0.harness.evaluate.main( - dataset_name, - dataset_split, - repo_split, - base_dir, + commit0_config["dataset_name"], + commit0_config["dataset_split"], + commit0_config["repo_split"], + commit0_config["base_dir"], branch, backend, timeout, @@ -229,14 +316,12 @@ def evaluate( ) -@app.command() +@commit0_app.command() def lint( - files: List[Path] = typer.Argument( - ..., help="Files to lint. If not provided, all files will be linted." - ), + files: List[Path] = typer.Argument(..., help="Files to lint."), ) -> None: """Lint given files if provided, otherwise lint all files in the base directory.""" - assert len(files) > 0, "No files to lint." + check_commit0_path() for path in files: if not path.is_file(): raise FileNotFoundError(f"File not found: {str(path)}") @@ -246,33 +331,30 @@ def lint( commit0.harness.lint.main(files) -@app.command() +@commit0_app.command() def save( - repo_split: str = typer.Argument( - ..., - help=f"Split of the repository, one of {', '.join(highlight(key, Colors.ORANGE) for key in SPLIT.keys())}", - ), owner: str = typer.Argument(..., help="Owner of the repository"), branch: str = typer.Argument(..., help="Branch to save"), - dataset_name: str = typer.Option( - "wentingzhao/commit0_docstring", help="Name of the Huggingface dataset" - ), - dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"), - base_dir: str = typer.Option("repos/", help="Base directory of repos"), github_token: str = typer.Option(None, help="GitHub token for authentication"), + commit0_dot_file_path: str = typer.Option( + ".commit0.yaml", + help="Path to the commit0 dot file, where the setup config is stored", + ), ) -> None: - """Save a Commit0 repository to GitHub.""" - check_valid(repo_split, SPLIT) + """Save Commit0 split you choose in Setup Stage to GitHub.""" + check_commit0_path() + commit0_config = read_commit0_dot_file(commit0_dot_file_path) + check_valid(commit0_config["repo_split"], SPLIT) - typer.echo(f"Saving repository split: {repo_split}") + typer.echo(f"Saving repository split: {commit0_config['repo_split']}") typer.echo(f"Owner: {owner}") typer.echo(f"Branch: {branch}") commit0.harness.save.main( - dataset_name, - dataset_split, - repo_split, - base_dir, + commit0_config["dataset_name"], + commit0_config["dataset_split"], + commit0_config["repo_split"], + commit0_config["base_dir"], owner, branch, github_token,