diff --git a/agent/README.md b/agent/README.md index 053e49d..5d7a587 100644 --- a/agent/README.md +++ b/agent/README.md @@ -38,6 +38,8 @@ Available options include: `--max-lint-info-length: int`: Maximum length of the lint information to use. [Default: `10000`] `--pre-commit-config-path: str`: Path to the pre-commit config file. This is needed for running `lint`. [Default: `.pre-commit-config.yaml`] `--agent-config-file: str`: Path to write the agent config. [Default: `.agent.yaml`] +`--add-import-module-to-context: bool`: Add import module to context. [Default: `False`] +`--record-test-for-each-commit: bool`: Record test results for each commit. [Default: `False`], if set to `True`, the test results will be saved in `experiment_log_dir/eval_results.json` ## Running Agent Use `agent run [OPTIONS] BRANCH` to execute an agent on a specific branch. diff --git a/agent/class_types.py b/agent/class_types.py index c06e5f6..12c74d4 100644 --- a/agent/class_types.py +++ b/agent/class_types.py @@ -21,3 +21,4 @@ class AgentConfig: pre_commit_config_path: str run_tests: bool max_iteration: int + record_test_for_each_commit: bool diff --git a/agent/cli.py b/agent/cli.py index 1b1c371..b02bf7a 100644 --- a/agent/cli.py +++ b/agent/cli.py @@ -135,6 +135,10 @@ def config( False, help="Run the lint on the entire directory", ), + record_test_for_each_commit: bool = typer.Option( + False, + help="Record the test for each commit", + ), pre_commit_config_path: str = typer.Option( ".pre-commit-config.yaml", help="Path to the pre-commit config file", @@ -170,6 +174,7 @@ def config( "max_lint_info_length": max_lint_info_length, "run_entire_dir_lint": run_entire_dir_lint, "pre_commit_config_path": pre_commit_config_path, + "record_test_for_each_commit": record_test_for_each_commit, } write_agent_config(agent_config_file, agent_config) diff --git a/agent/display.py b/agent/display.py index c321908..b5605d1 100644 --- a/agent/display.py +++ b/agent/display.py @@ -443,4 +443,6 @@ def __exit__( ) as json_file: json.dump(summary_data, json_file, indent=4) - print("\nSummary has been written to processing_summary.json") + print( + f"\nSummary has been written to processing_summary_{self.branch_name}.json" + ) diff --git a/agent/run_agent.py b/agent/run_agent.py index a140a5a..a1324cd 100644 --- a/agent/run_agent.py +++ b/agent/run_agent.py @@ -12,6 +12,7 @@ get_lint_cmd, read_yaml_config, ) +import json import subprocess from agent.agents import AiderAgents from typing import Optional, Type, cast @@ -20,7 +21,7 @@ from commit0.harness.constants import SPLIT from commit0.harness.get_pytest_ids import main as get_tests from commit0.harness.constants import RUN_AGENT_LOG_DIR, RepoInstance -from commit0.cli import read_commit0_dot_file +from commit0.cli import read_commit0_config_file from pathlib import Path from datetime import datetime from agent.display import TerminalDisplay @@ -45,6 +46,21 @@ def __exit__( os.chdir(self.cwd) +def run_eval_after_each_commit( + branch: str, backend: str, commit0_config_file: str +) -> str: + """Run the eval command after each commit.""" + eval_cmd = f"python -m commit0 evaluate --branch {branch} --backend {backend} --commit0-config-file {commit0_config_file} --timeout 100" + try: + result = subprocess.run( + eval_cmd, shell=True, capture_output=True, text=True, check=True + ) + return result.stdout + except subprocess.CalledProcessError as e: + print(f"Error running eval command: {e}") + return e.stdout if e.stdout else str(e) + + def run_agent_for_repo( repo_base_dir: str, agent_config: AgentConfig, @@ -58,7 +74,7 @@ def run_agent_for_repo( ) -> None: """Run Aider for a given repository.""" # get repo info - commit0_config = read_commit0_dot_file(commit0_config_file) + commit0_config = read_commit0_config_file(commit0_config_file) assert "commit0" in commit0_config["dataset_name"] _, repo_name = example["repo"].split("/") @@ -130,6 +146,7 @@ def run_agent_for_repo( ) experiment_log_dir.mkdir(parents=True, exist_ok=True) + eval_results = {} # write agent_config to .agent.yaml in the log_dir for record agent_config_log_file = experiment_log_dir / ".agent.yaml" with open(agent_config_log_file, "w") as agent_config_file: @@ -161,6 +178,12 @@ def run_agent_for_repo( test_log_dir, test_first=True, ) + if agent_config.record_test_for_each_commit: + current_commit = local_repo.head.commit.hexsha + eval_results[current_commit] = run_eval_after_each_commit( + branch, backend, commit0_config_file + ) + # after running the agent, update the money display update_queue.put( ( @@ -188,6 +211,12 @@ def run_agent_for_repo( lint_log_dir, lint_first=True, ) + if agent_config.record_test_for_each_commit: + current_commit = local_repo.head.commit.hexsha + eval_results[current_commit] = run_eval_after_each_commit( + branch, backend, commit0_config_file + ) + # after running the agent, update the money display update_queue.put( ( @@ -211,12 +240,22 @@ def run_agent_for_repo( repo_name, agent_config.use_lint_info, commit0_config_file ) agent_return = agent.run(message, "", lint_cmd, [f], file_log_dir) + if agent_config.record_test_for_each_commit: + current_commit = local_repo.head.commit.hexsha + eval_results[current_commit] = run_eval_after_each_commit( + branch, backend, commit0_config_file + ) + update_queue.put( ( "update_money_display", (repo_name, file_name, agent_return.last_cost), ) ) + if agent_config.record_test_for_each_commit: + with open(experiment_log_dir / "eval_results.json", "w") as f: + json.dump(eval_results, f) + update_queue.put(("finish_repo", repo_name)) @@ -236,7 +275,7 @@ def run_agent( agent_config = AgentConfig(**config) commit0_config_file = os.path.abspath(commit0_config_file) - commit0_config = read_commit0_dot_file(commit0_config_file) + commit0_config = read_commit0_config_file(commit0_config_file) dataset = load_dataset( commit0_config["dataset_name"], split=commit0_config["dataset_split"] diff --git a/agent/run_agent_no_rich.py b/agent/run_agent_no_rich.py index ceb4fb2..5822063 100644 --- a/agent/run_agent_no_rich.py +++ b/agent/run_agent_no_rich.py @@ -14,33 +14,17 @@ read_yaml_config, ) import subprocess +import json from agent.agents import AiderAgents -from typing import Optional, Type, cast -from types import TracebackType +from typing import cast from agent.class_types import AgentConfig from commit0.harness.constants import SPLIT from commit0.harness.get_pytest_ids import main as get_tests from commit0.harness.constants import RUN_AGENT_LOG_DIR, RepoInstance -from commit0.cli import read_commit0_dot_file +from commit0.cli import read_commit0_config_file from pathlib import Path from datetime import datetime - - -class DirContext: - def __init__(self, d: str): - self.dir = d - self.cwd = os.getcwd() - - def __enter__(self): - os.chdir(self.dir) - - def __exit__( - self, - exctype: Optional[Type[BaseException]], - excinst: Optional[BaseException], - exctb: Optional[TracebackType], - ) -> None: - os.chdir(self.cwd) +from agent.run_agent import DirContext, run_eval_after_each_commit def run_agent_for_repo( @@ -55,7 +39,7 @@ def run_agent_for_repo( ) -> None: """Run Aider for a given repository.""" # get repo info - commit0_config = read_commit0_dot_file(commit0_config_file) + commit0_config = read_commit0_config_file(commit0_config_file) assert "commit0" in commit0_config["dataset_name"] _, repo_name = example["repo"].split("/") @@ -123,6 +107,7 @@ def run_agent_for_repo( / datetime.now().strftime("%Y-%m-%d_%H-%M-%S") ) experiment_log_dir.mkdir(parents=True, exist_ok=True) + eval_results = {} # write agent_config to .agent.yaml in the log_dir for record agent_config_log_file = experiment_log_dir / ".agent.yaml" @@ -153,6 +138,11 @@ def run_agent_for_repo( test_log_dir, test_first=True, ) + if agent_config.record_test_for_each_commit: + current_commit = local_repo.head.commit.hexsha + eval_results[current_commit] = run_eval_after_each_commit( + branch, backend, commit0_config_file + ) elif agent_config.run_entire_dir_lint: # when unit test feedback is available, iterate over test files for lint_file in lint_files: @@ -171,6 +161,11 @@ def run_agent_for_repo( lint_log_dir, lint_first=True, ) + if agent_config.record_test_for_each_commit: + current_commit = local_repo.head.commit.hexsha + eval_results[current_commit] = run_eval_after_each_commit( + branch, backend, commit0_config_file + ) else: # when unit test feedback is not available, iterate over target files to edit message = get_message(agent_config, repo_path, test_files=test_files) @@ -185,6 +180,14 @@ def run_agent_for_repo( repo_name, agent_config.use_lint_info, commit0_config_file ) _ = agent.run(message, "", lint_cmd, [f], file_log_dir) + if agent_config.record_test_for_each_commit: + current_commit = local_repo.head.commit.hexsha + eval_results[current_commit] = run_eval_after_each_commit( + branch, backend, commit0_config_file + ) + if agent_config.record_test_for_each_commit: + with open(experiment_log_dir / "eval_results.json", "w") as f: + json.dump(eval_results, f) def run_agent( @@ -205,7 +208,7 @@ def run_agent( agent_config = AgentConfig(**config) commit0_config_file = os.path.abspath(commit0_config_file) - commit0_config = read_commit0_dot_file(commit0_config_file) + commit0_config = read_commit0_config_file(commit0_config_file) dataset = load_dataset( commit0_config["dataset_name"], split=commit0_config["dataset_split"] diff --git a/commit0/cli.py b/commit0/cli.py index f0afb54..badb4a0 100644 --- a/commit0/cli.py +++ b/commit0/cli.py @@ -86,12 +86,12 @@ def check_valid(one: str, total: Union[list[str], dict[str, list[str]]]) -> None ) -def write_commit0_dot_file(dot_file_path: str, config: dict) -> None: +def write_commit0_config_file(dot_file_path: str, config: dict) -> None: with open(dot_file_path, "w") as f: yaml.dump(config, f, default_flow_style=False) -def read_commit0_dot_file(dot_file_path: str) -> dict: +def read_commit0_config_file(dot_file_path: str) -> dict: # Check if the file exists before attempting to read it if not os.path.exists(dot_file_path): raise FileNotFoundError( @@ -112,7 +112,7 @@ def setup( ), dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"), base_dir: str = typer.Option("repos/", help="Base directory to clone repos to"), - commit0_dot_file_path: str = typer.Option( + commit0_config_file: str = typer.Option( ".commit0.yaml", help="Storing path for stateful commit0 configs" ), ) -> None: @@ -127,7 +127,7 @@ def setup( typer.echo(f"Dataset split: {highlight(dataset_split, Colors.ORANGE)}") typer.echo(f"Base directory: {highlight(base_dir, Colors.ORANGE)}") typer.echo( - f"Commit0 dot file path: {highlight(commit0_dot_file_path, Colors.ORANGE)}" + f"Commit0 dot file path: {highlight(commit0_config_file, Colors.ORANGE)}" ) commit0.harness.setup.main( @@ -138,8 +138,8 @@ def setup( ) # after successfully setup, write the commit0 dot file - write_commit0_dot_file( - commit0_dot_file_path, + write_commit0_config_file( + commit0_config_file, { "dataset_name": dataset_name, "dataset_split": dataset_split, @@ -152,7 +152,7 @@ def setup( @commit0_app.command() def build( num_workers: int = typer.Option(8, help="Number of workers"), - commit0_dot_file_path: str = typer.Option( + commit0_config_file: str = typer.Option( ".commit0.yaml", help="Path to the commit0 dot file, where the setup config is stored", ), @@ -167,7 +167,7 @@ def build( """Build Commit0 split you choose in Setup Stage.""" check_commit0_path() - commit0_config = read_commit0_dot_file(commit0_dot_file_path) + commit0_config = read_commit0_config_file(commit0_config_file) check_valid(commit0_config["repo_split"], SPLIT) typer.echo( @@ -228,7 +228,7 @@ def test( rebuild: bool = typer.Option( False, "--rebuild", help="Whether to rebuild an image" ), - commit0_dot_file_path: str = typer.Option( + commit0_config_file: str = typer.Option( ".commit0.yaml", help="Path to the commit0 dot file, where the setup config is stored", ), @@ -251,7 +251,7 @@ def test( repo_or_repo_path = repo_or_repo_path[:-1] check_valid(repo_or_repo_path.split("/")[-1], SPLIT_ALL) - commit0_config = read_commit0_dot_file(commit0_dot_file_path) + commit0_config = read_commit0_config_file(commit0_config_file) if reference: branch = "reference" @@ -304,7 +304,7 @@ def evaluate( coverage: Annotated[ bool, typer.Option("--coverage", help="Whether to get coverage information") ] = False, - commit0_dot_file_path: str = typer.Option( + commit0_config_file: str = typer.Option( ".commit0.yaml", help="Path to the commit0 dot file, where the setup config is stored", ), @@ -315,7 +315,7 @@ def evaluate( if reference: branch = "reference" - commit0_config = read_commit0_dot_file(commit0_dot_file_path) + commit0_config = read_commit0_config_file(commit0_config_file) check_valid(commit0_config["repo_split"], SPLIT) typer.echo(f"Evaluating repository split: {commit0_config['repo_split']}") @@ -344,7 +344,7 @@ def lint( files: Union[List[Path], None] = typer.Option( None, help="Files to lint. If not provided, all files will be linted." ), - commit0_dot_file_path: str = typer.Option( + commit0_config_file: str = typer.Option( ".commit0.yaml", help="Path to the commit0 dot file, where the setup config is stored", ), @@ -358,7 +358,7 @@ def lint( ) -> None: """Lint given files if provided, otherwise lint all files in the base directory.""" check_commit0_path() - commit0_config = read_commit0_dot_file(commit0_dot_file_path) + commit0_config = read_commit0_config_file(commit0_config_file) appended_files = None if files is not None: appended_files = [] @@ -383,14 +383,14 @@ def save( owner: str = typer.Argument(..., help="Owner of the repository"), branch: str = typer.Argument(..., help="Branch to save"), github_token: str = typer.Option(None, help="GitHub token for authentication"), - commit0_dot_file_path: str = typer.Option( + commit0_config_file: str = typer.Option( ".commit0.yaml", help="Path to the commit0 dot file, where the setup config is stored", ), ) -> None: """Save Commit0 split you choose in Setup Stage to GitHub.""" check_commit0_path() - commit0_config = read_commit0_dot_file(commit0_dot_file_path) + commit0_config = read_commit0_config_file(commit0_config_file) check_valid(commit0_config["repo_split"], SPLIT) typer.echo(f"Saving repository split: {commit0_config['repo_split']}") diff --git a/docs/render_submissions.py b/docs/render_submissions.py index 3fe45a0..d07a7b2 100644 --- a/docs/render_submissions.py +++ b/docs/render_submissions.py @@ -14,7 +14,7 @@ from commit0.harness.constants import SPLIT from commit0.harness.utils import clone_repo -from commit0.cli import write_commit0_dot_file +from commit0.cli import write_commit0_config_file import logging @@ -420,7 +420,7 @@ def main(args): if args.get_reference_details: branch_name = "reference" org_name = f"commit0_{args.split}" - commit0_dot_file_path = os.path.join( + commit0_config_file = os.path.join( analysis_files_path, "repos", org_name, branch_name, ".commit0.yaml" ) submission_repos_path = os.path.join( @@ -429,7 +429,7 @@ def main(args): if args.do_setup: os.system( f"commit0 setup {args.split} --base-dir {submission_repos_path} " - f"--commit0-config-file {commit0_dot_file_path}" + f"--commit0-config-file {commit0_config_file}" ) submission_metrics_output_file = os.path.join( analysis_files_path, org_name, f"{branch_name}.json" @@ -456,7 +456,7 @@ def main(args): if args.overwrite_previous_eval or need_re_eval: os.system( "commit0 evaluate --reference " - f"--commit0-config-file {commit0_dot_file_path}" + f"--commit0-config-file {commit0_config_file}" ) # get coverage and pytest info for each repo for example in dataset: @@ -494,7 +494,7 @@ def main(args): if os.path.exists(submission_repos_path): shutil.rmtree(submission_repos_path) os.makedirs(os.path.join(analysis_files_path, org_name), exist_ok=True) - commit0_dot_file_path = os.path.join( + commit0_config_file = os.path.join( analysis_files_path, "submission_repos", org_name, @@ -519,8 +519,8 @@ def main(args): if os.path.exists(clone_dir): shutil.rmtree(clone_dir) # after successfully setup, write the commit0 dot file - write_commit0_dot_file( - commit0_dot_file_path, + write_commit0_config_file( + commit0_config_file, { "dataset_name": commit0_dataset_name, "dataset_split": "test", @@ -531,7 +531,7 @@ def main(args): # run pytests os.system( f"commit0 evaluate --branch {branch_name} " - f"--commit0-config-file {commit0_dot_file_path}" + f"--commit0-config-file {commit0_config_file}" ) for example in dataset: repo_name = example["repo"].split("/")[-1]