From 8656eb0e218048524ea37c5b7d0862ca6c0d7ed3 Mon Sep 17 00:00:00 2001 From: Wenting Zhao Date: Fri, 20 Sep 2024 16:44:00 +0000 Subject: [PATCH 1/3] handled log levels --- commit0/cli.py | 7 +++++-- commit0/harness/build.py | 3 ++- commit0/harness/docker_build.py | 1 + commit0/harness/evaluate.py | 4 ++-- commit0/harness/get_pytest_ids.py | 4 ++-- commit0/harness/run_pytest_ids.py | 9 +++++++-- commit0/harness/utils.py | 8 +++++--- 7 files changed, 24 insertions(+), 12 deletions(-) diff --git a/commit0/cli.py b/commit0/cli.py index 31434c6..1caae1b 100644 --- a/commit0/cli.py +++ b/commit0/cli.py @@ -77,6 +77,7 @@ def build( ), dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"), num_workers: int = typer.Option(8, help="Number of workers"), + verbose: Annotated[int, typer.Option("--verbose", "-v", help="Set this to 2 for more logging information", count=True)] = 1, ) -> None: """Commit0 build a repository.""" check_valid(repo_split, SPLIT) @@ -91,6 +92,7 @@ def build( dataset_split, repo_split, num_workers, + verbose, ) @@ -106,7 +108,7 @@ def get_tests( typer.echo(f"Getting tests for repository: {repo_name}") - commit0.harness.get_pytest_ids.main(repo_name, stdout=True) + commit0.harness.get_pytest_ids.main(repo_name, verbose=1) @app.command() @@ -132,6 +134,7 @@ def test( reference: Annotated[ bool, typer.Option("--reference", help="Test the reference commit.") ] = False, + verbose: Annotated[int, typer.Option("--verbose", "-v", count=True)] = 1 ) -> None: """Run tests on a Commit0 repository.""" if repo_or_repo_path.endswith("/"): @@ -160,7 +163,7 @@ def test( backend, timeout, num_cpus, - stdout=True, + verbose, ) diff --git a/commit0/harness/build.py b/commit0/harness/build.py index 4cbc6e3..6789f85 100644 --- a/commit0/harness/build.py +++ b/commit0/harness/build.py @@ -19,6 +19,7 @@ def main( dataset_split: str, repo_split: str, num_workers: int, + verbose: int, ) -> None: dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore specs = [] @@ -30,7 +31,7 @@ def main( specs.append(spec) client = docker.from_env() - build_repo_images(client, specs, num_workers) + build_repo_images(client, specs, num_workers, verbose) for spec in specs: image = client.images.get(spec.repo_image_key) repository, tag = spec.repo_image_tag.split(":") diff --git a/commit0/harness/docker_build.py b/commit0/harness/docker_build.py index c3db38d..affd707 100644 --- a/commit0/harness/docker_build.py +++ b/commit0/harness/docker_build.py @@ -196,6 +196,7 @@ def build_repo_images( client: docker.DockerClient, dataset: list, max_workers: int = 4, + verbose: int = 1, ) -> tuple[list[str], list[str]]: """Builds the repo images required for the dataset if they do not already exist. diff --git a/commit0/harness/evaluate.py b/commit0/harness/evaluate.py index a30e731..b532af1 100644 --- a/commit0/harness/evaluate.py +++ b/commit0/harness/evaluate.py @@ -57,7 +57,7 @@ def main( backend, timeout, num_cpus, - stdout=False, + verbose=0, ): None for repo, test_dir in pairs } @@ -70,7 +70,7 @@ def main( for name in tqdm(log_dirs): report_file = os.path.join(name, "report.json") name = name.split("/")[2] - test_ids = get_tests(name, stdout=False) + test_ids = get_tests(name, verbose=0) if not os.path.exists(report_file): out.append( { diff --git a/commit0/harness/get_pytest_ids.py b/commit0/harness/get_pytest_ids.py index 601b134..36ce65a 100644 --- a/commit0/harness/get_pytest_ids.py +++ b/commit0/harness/get_pytest_ids.py @@ -2,7 +2,7 @@ from typing import List -def main(repo: str, stdout: bool) -> List[str]: +def main(repo: str, verbose: int) -> List[str]: repo = repo.lower() repo = repo.replace(".", "-") out = "" @@ -13,7 +13,7 @@ def main(repo: str, stdout: bool) -> List[str]: if file: content = file.read().decode("utf-8") out += content - if stdout: + if verbose: print(content) out = out.split("\n") return out diff --git a/commit0/harness/run_pytest_ids.py b/commit0/harness/run_pytest_ids.py index 0882a22..5308af7 100644 --- a/commit0/harness/run_pytest_ids.py +++ b/commit0/harness/run_pytest_ids.py @@ -37,7 +37,7 @@ def main( backend: str, timeout: int, num_cpus: int, - stdout: bool, + verbose: int, ) -> None: """Runs the pytests for repos in a dataset. @@ -64,15 +64,17 @@ def main( log_dir = RUN_PYTEST_LOG_DIR / repo_name / branch / hashed_test_ids log_dir.mkdir(parents=True, exist_ok=True) log_file = log_dir / "run_pytest.log" - logger = setup_logger(repo_name, log_file) + logger = setup_logger(repo_name, log_file, verbose=verbose) try: local_repo = git.Repo(repo_or_repo_dir) + logger.info(f"Loaded a git repo from {repo_or_repo_dir}") except git.exc.NoSuchPathError: # type: ignore repo_dir = os.path.join(base_dir, repo_name) logger.error(f"{repo_or_repo_dir} is not a git dir, trying {repo_dir} again") try: local_repo = git.Repo(repo_dir) + logger.info(f"Retried succeeded. Loaded a git repo from {repo_dir}") except git.exc.NoSuchPathError: # type: ignore raise Exception( f"{repo_dir} and {repo_or_repo_dir} are not git directories.\nUsage: commit0 test {{repo_dir}} {{branch}} {{test_ids}}" @@ -133,6 +135,9 @@ def main( logger, ) close_logger(logger) + if verbose > 0: + test_output = Path(log_dir / "test_output.txt") + print(test_output.read_text()) pytest_exit_code = Path(log_dir / "pytest_exit_code.txt").read_text().strip() sys.exit(int(pytest_exit_code)) except EvaluationError as e: diff --git a/commit0/harness/utils.py b/commit0/harness/utils.py index 6279932..e074fbe 100644 --- a/commit0/harness/utils.py +++ b/commit0/harness/utils.py @@ -27,18 +27,20 @@ def __str__(self): ) -def setup_logger(repo: str, log_file: Path, mode: str = "w") -> logging.Logger: +def setup_logger(repo: str, log_file: Path, mode: str = "w", verbose: int = 1) -> logging.Logger: """Used for logging the build process of images and running containers. It writes logs to the log file. """ log_file.parent.mkdir(parents=True, exist_ok=True) logger = logging.getLogger(f"{repo}.{log_file.name}") handler = logging.FileHandler(log_file, mode=mode) - stdout_handler = logging.StreamHandler(sys.stdout) - logger.addHandler(stdout_handler) formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) + if verbose == 2: + stdout_handler = logging.StreamHandler(sys.stdout) + stdout_handler.setFormatter(formatter) + logger.addHandler(stdout_handler) logger.setLevel(logging.INFO) logger.propagate = False setattr(logger, "log_file", log_file) From f0e9e9393d24c6f12befdb6c90d0180e29f40e8f Mon Sep 17 00:00:00 2001 From: Wenting Zhao Date: Fri, 20 Sep 2024 16:47:04 +0000 Subject: [PATCH 2/3] added help messages --- commit0/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commit0/cli.py b/commit0/cli.py index 1caae1b..8714f1d 100644 --- a/commit0/cli.py +++ b/commit0/cli.py @@ -77,7 +77,7 @@ def build( ), dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"), num_workers: int = typer.Option(8, help="Number of workers"), - verbose: Annotated[int, typer.Option("--verbose", "-v", help="Set this to 2 for more logging information", count=True)] = 1, + verbose: int = typer.Option(1, "--verbose", "-v", help="Set this to 2 for more logging information", count=True), ) -> None: """Commit0 build a repository.""" check_valid(repo_split, SPLIT) @@ -134,7 +134,7 @@ def test( reference: Annotated[ bool, typer.Option("--reference", help="Test the reference commit.") ] = False, - verbose: Annotated[int, typer.Option("--verbose", "-v", count=True)] = 1 + verbose: int = typer.Option(1, "--verbose", "-v", help="Set this to 2 for more logging information", count=True), ) -> None: """Run tests on a Commit0 repository.""" if repo_or_repo_path.endswith("/"): From e02e7435073474e57d08e4f60b29a450c15fc3cf Mon Sep 17 00:00:00 2001 From: Wenting Zhao Date: Fri, 20 Sep 2024 16:49:24 +0000 Subject: [PATCH 3/3] pre-commit --- baselines/run_agent.py | 2 +- commit0/cli.py | 16 ++++++++++++++-- commit0/harness/docker_build.py | 1 + commit0/harness/utils.py | 4 +++- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/baselines/run_agent.py b/baselines/run_agent.py index 0535ba3..9142ae7 100644 --- a/baselines/run_agent.py +++ b/baselines/run_agent.py @@ -52,7 +52,7 @@ def run_agent_for_repo( repo_name = repo_name.replace(".", "-") # Call the commit0 get-tests command to retrieve test files - test_files_str = get_tests(repo_name, stdout=False) + test_files_str = get_tests(repo_name, verbose=0) test_files = sorted(list(set([i.split(":")[0] for i in test_files_str]))) repo_path = os.path.join(commit0_config.base_dir, repo_name) diff --git a/commit0/cli.py b/commit0/cli.py index 8714f1d..b0b0b94 100644 --- a/commit0/cli.py +++ b/commit0/cli.py @@ -77,7 +77,13 @@ def build( ), dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"), num_workers: int = typer.Option(8, help="Number of workers"), - verbose: int = typer.Option(1, "--verbose", "-v", help="Set this to 2 for more logging information", count=True), + verbose: int = typer.Option( + 1, + "--verbose", + "-v", + help="Set this to 2 for more logging information", + count=True, + ), ) -> None: """Commit0 build a repository.""" check_valid(repo_split, SPLIT) @@ -134,7 +140,13 @@ def test( reference: Annotated[ bool, typer.Option("--reference", help="Test the reference commit.") ] = False, - verbose: int = typer.Option(1, "--verbose", "-v", help="Set this to 2 for more logging information", count=True), + verbose: int = typer.Option( + 1, + "--verbose", + "-v", + help="Set this to 2 for more logging information", + count=True, + ), ) -> None: """Run tests on a Commit0 repository.""" if repo_or_repo_path.endswith("/"): diff --git a/commit0/harness/docker_build.py b/commit0/harness/docker_build.py index affd707..2f7aeea 100644 --- a/commit0/harness/docker_build.py +++ b/commit0/harness/docker_build.py @@ -205,6 +205,7 @@ def build_repo_images( client (docker.DockerClient): Docker client to use for building the images dataset (list): List of test specs or dataset to build images for max_workers (int): Maximum number of workers to use for building images + verbose (int): Level of verbosity Return: ------ diff --git a/commit0/harness/utils.py b/commit0/harness/utils.py index e074fbe..8836415 100644 --- a/commit0/harness/utils.py +++ b/commit0/harness/utils.py @@ -27,7 +27,9 @@ def __str__(self): ) -def setup_logger(repo: str, log_file: Path, mode: str = "w", verbose: int = 1) -> logging.Logger: +def setup_logger( + repo: str, log_file: Path, mode: str = "w", verbose: int = 1 +) -> logging.Logger: """Used for logging the build process of images and running containers. It writes logs to the log file. """