From fd795ba3c0aee8b38b37a86a6b812e736c83c82c Mon Sep 17 00:00:00 2001 From: Celine Lee Date: Wed, 25 Sep 2024 16:32:48 -0400 Subject: [PATCH 1/7] initial changes --- .gitignore | 3 +- docs/analysis.md | 6 + docs/render_submissions.py | 334 +++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 4 files changed, 343 insertions(+), 1 deletion(-) create mode 100644 docs/analysis.md create mode 100644 docs/render_submissions.py diff --git a/.gitignore b/.gitignore index 0459882..e0582af 100644 --- a/.gitignore +++ b/.gitignore @@ -166,4 +166,5 @@ repos/ config.yml hydra_outputs/ .commit0* -.agent* \ No newline at end of file +.agent* +docs/analysis_*.md \ No newline at end of file diff --git a/docs/analysis.md b/docs/analysis.md new file mode 100644 index 0000000..bc2a8f9 --- /dev/null +++ b/docs/analysis.md @@ -0,0 +1,6 @@ + +| | Name | Summary | | +|--|--------|----------|--| +||[reference](/analysis_reference)|3628 / 33 ; duration: 18.66s|| +||[test-save-commit0](/analysis_test-save-commit0)|0 / 0 ; duration: 0.00s|| +||[model_name-claude-3-5-sonnet-20240620__run_tests-0__use_lint_info-0__use_spec_info-0](/analysis_model_name-claude-3-5-sonnet-20240620__run_tests-0__use_lint_info-0__use_spec_info-0)|0 / 0 ; duration: 0.00s|| \ No newline at end of file diff --git a/docs/render_submissions.py b/docs/render_submissions.py new file mode 100644 index 0000000..8ab9be2 --- /dev/null +++ b/docs/render_submissions.py @@ -0,0 +1,334 @@ +import re +import os +import glob +import ast +from datasets import load_dataset +import subprocess +import json +import shutil +import sys +import argparse +from transformers import AutoTokenizer +import commit0.harness.setup +from commit0.harness.constants import SPLIT, SPLIT_ALL +from commit0.harness.utils import clone_repo +from commit0.cli import write_commit0_dot_file +import pypdf +# from render_utils import _find_files_to_edit + +import logging + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +analysis_files_path = "/share/rush/commit0_analysis_temp" + +def get_pytest_info(path_to_logs, repo_name, branch_name): + pytest_info = {} + for pytest_hash in os.listdir(path_to_logs): + eval_script = open(os.path.join(path_to_logs, pytest_hash, "eval.sh")).read() + testname = re.search(r'([\S]+) > test_output', eval_script).group(1) + patch_diff = open(os.path.join(path_to_logs, pytest_hash, "patch.diff")).read() + pytest_info[testname] = {'hash': pytest_hash, 'patch_diff': patch_diff, 'summary': pytest_summary, 'failures': {}, 'duration': pytest_report['duration']} + report_file_path = os.path.join(path_to_logs, pytest_hash, "report.json") + if not os.path.exists(report_file_path): + reason_for_failure = open(os.path.join(path_to_logs, pytest_hash, "test_output.txt")).read() + pytest_info[testname]["failed_to_run"] = reason_for_failure + return pytest_info + pytest_report = json.load(report_file_path) + pytest_summary = pytest_report['summary'] + if 'passed' not in pytest_summary: pytest_summary['passed'] = 0 + for test in pytest_report["tests"]: + if test['outcome'] == "passed": continue + if 'longrepr' in test: + failure_string = test['longrepr'] + elif '???' in test: + failure_string = test['???']['longrepr'] + elif test['outcome'] == 'error': + failure_string = test['setup']['longrepr'] + elif 'setup' in test and 'longrepr' in test['setup']: + failure_string = test['setup']['longrepr'] + elif 'call' in test and 'longrepr' in test['call']: + failure_string = test['call']['longrepr'] + # could use test['call']['traceback'] information and test['call']['crash'] for more info + else: + import pdb; pdb.set_trace() + duration = 0. + for action_key in ["setup", "call", "teardown"]: + if action_key not in test: continue + if "duration" in test: duration += test["duration"] + pytest_info[testname]['failures'][test['nodeid']] = {"failure_string": failure_string, "duration": duration} + return pytest_info + +def get_coverage_info(path_to_logs, repo_name, branch_name): + # for filename, file_coverage in json.load(open(os.path.join(path_to_logs, pytest_hash, "coverage.json")))["files"].items(): + # if not any(relevant_function.startswith(filename) for relevant_function in relevant_functions): continue + # for funcname, func_coverage in file_coverage["functions"].items(): + # if f"{filename}::{funcname}" not in relevant_functions: continue + # pycov_info[testname][f"{filename}::{funcname}"] = { + # "implementation": submission_info["function_impls"][f"{filename}::{funcname}"], + # "executed_lines": func_coverage["executed_lines"], + # "executed_branches": func_coverage["executed_branches"] + # } + raise NotImplementedError + +def get_blank_repo_metrics(blank_source_code_folder, spec_filename, tokenizer, code_file_filter=lambda filename:filename): + blank_repo_metrics = { + "functions_to_edit": [], + } + + for subdir, _, files in os.walk(blank_source_code_folder): + for file in files: + if not code_file_filter(file): continue + filename = os.path.join(subdir, file) + splitted = filename.split('/') + hidden = False + for one in splitted: + if one.startswith('.'): + hidden = True + break + if hidden: + continue + try: + code = open(filename, encoding='utf-8').read() + except: + print(f"Trouble opening {filename}") + continue + + filename = filename[len(blank_source_code_folder):].lstrip(" /") + try: + code_tree = ast.parse(code) + except: + print(f"Trouble parsing {os.path.join(blank_source_code_folder, filename)}") + continue + for node in ast.walk(code_tree): + if isinstance(node, ast.ClassDef): + for child in node.body: + child.parent_class = node.name + elif isinstance(node, ast.FunctionDef) and len(node.body) > 0: + classname = "" + if hasattr(node, "parent_class"): + classname = f"{node.parent_class}." + for child in node.body: + child.parent_function = f"{classname}{node.name}" + elif isinstance(node, ast.Pass): + if hasattr(node, "parent_function"): + blank_repo_metrics["functions_to_edit"].append(f"{filename}::{node.parent_function}") + elif hasattr(node, "parent_class"): + blank_repo_metrics["functions_to_edit"].append(f"{filename}::{node.parent_class}") + + # Get spec metrics + concatted_spec = "" + reader = pypdf.PdfReader(spec_filename) + for p_idx, page in enumerate(reader.pages): + try: + concatted_spec += page.extract_text() + except pypdf.errors.PdfReadError as e: + print(f"Could not load page {p_idx} of {spec_filename}, excluding") + blank_repo_metrics["no_tokens_in_spec"] = tokenizer(concatted_spec, return_tensors='pt').input_ids.shape[-1] + + return blank_repo_metrics + +def render_mds(subfolder="docs"): + all_submissions = {} + + method_repo_pytests = {} + for branch_name in glob.glob(os.path.join(analysis_files_path, '*')): + branch_name = os.path.basename(branch_name) + if branch_name in {"blank", "repos", "submission_repos"}: continue + all_submissions[branch_name] = {} + for repo_file in glob.glob(os.path.join(analysis_files_path, branch_name, '*.json')): + + repo_metrics_output_file = os.path.join(analysis_files_path, branch_name, repo_file) + repo_metrics = json.load(open(repo_metrics_output_file)) + repo_name = os.path.basename(repo_file[:-len(".json")]) + + all_submissions[branch_name][repo_name] = {} + + method_repo_pytests[f"{branch_name}_{repo_name}"] = f"# Submission Name: {branch_name}\n# Repository: {repo_name}" + if 'pytest_results' in repo_metrics: repo_metrics = repo_metrics['pytest_results'] + for pytest_group, pytest_info in repo_metrics.items(): + pytest_group = os.path.basename(pytest_group.strip("/")) + patch_diff = f"""\n\n### Patch diff\n```diff\n{pytest_info['patch_diff']}```""" + if 'reason_for_failure' in pytest_info: + all_submissions[branch_name][repo_name][pytest_group] = {"reason_for_failure": pytest_info["failed_to_run"]} + method_repo_pytests[f"{branch_name}_{repo_name}"] += f"""\n## Failed to run pytests\n{ pytest_info['failed_to_run']}""" + else: + all_submissions[branch_name][repo_name][pytest_group] = {"summary": pytest_info['summary'], "duration": pytest_info["duration"]} + method_repo_pytests[f"{branch_name}_{repo_name}"] += f"""\n## Pytest Summary: {pytest_group} + | status | count | + |:---------|:-----:| + """ + for category, count in pytest_info['summary'].items(): + if category not in {'duration'}: + method_repo_pytests[f"{branch_name}_{repo_name}"] += f"""| {category} | {count} |\n""" + else: + method_repo_pytests[f"{branch_name}_{repo_name}"] += f"""| {category} | {float(count):.2f}s |\n""" + + method_repo_pytests[f"{branch_name}_{repo_name}"] += f"\n## Failed pytest outputs: {pytest_group}\n\n" + for testname, failure in pytest_info['failures'].items(): + shortened_testname = os.path.basename(testname) + method_repo_pytests[f"{branch_name}_{repo_name}"] += f"### {shortened_testname}\n\n
{shortened_testname}
\n{failure['failure_string']}\n
\n
\n" + + back_button = f"[back to {branch_name} summary]({os.path.join('/', f'analysis_{branch_name}')})\n\n" + with open(os.path.join(subfolder, f"analysis_{branch_name}_{repo_name}.md"), 'w') as wf: + wf.write(back_button + method_repo_pytests[f"{branch_name}_{repo_name}"] + patch_diff) + + + # Render general page. Has buttons to all methods + leaderboard = """ +| | Name | Summary | | +|--|--------|----------|--|""" + # Render method page. Per method, buttons to all repos. + method_to_repos = {} + # Render method & repo page. Has "back" button. + for branch_name, branch_info in all_submissions.items(): + cum_pytests = {'passed': 0} + method_to_repos[branch_name] = """ +| | Repository | Summary | | +|-|------------|---------|-|""" + total_tests = 0 # better info is probably broken down by split lol TODO + total_duration = 0. + for repo_name, repo_test_info in branch_info.items(): + for testname, test_info in repo_test_info.items(): + total_duration += test_info['duration'] + if "reason_for_failure" in test_info: + summary_pytests_string = "failure" + else: + summary_pytests_string = f"{testname}: {test_info['summary']['passed']} / {test_info['summary']['collected']} ; duration: { test_info['duration']:.2f}s" + for category, count in test_info["summary"].items(): + if category not in cum_pytests: + cum_pytests[category] = 0 + if isinstance(count, int): cum_pytests[category] += int(count) + elif isinstance(count, float): cum_pytests[category] += float(count) + total_tests += 1 + method_to_repos[branch_name] += f"\n||[{repo_name}]({os.path.join('/', f'analysis_{branch_name}_{repo_name}')})|{summary_pytests_string}||" + break # assume we ran all tests. will add functionality for checking diff tests later, as we need it. + summary_pytests_string = f"{cum_pytests['passed']} / {total_tests} ; duration: {total_duration:.2f}s" + leaderboard += f"\n||[{branch_name}]({os.path.join('/', f'analysis_{branch_name}')})|{summary_pytests_string}||" + with open(os.path.join(subfolder, f"analysis_{branch_name}.md"), 'w') as wf: + wf.write( method_to_repos[branch_name]) + with open(os.path.join(subfolder, "analysis.md"), 'w') as wf: + wf.write(leaderboard) + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--do_setup", action="store_true") + parser.add_argument("--get_blank_details", action="store_true") + parser.add_argument("--get_reference_details", action="store_true") + parser.add_argument("--keep_previous_eval", action="store_true") + parser.add_argument("--analyze_submissions", action="store_true") + parser.add_argument("--render_webpages", action="store_true") + + parser.add_argument("--split", type=str, default='lite') + + parser.add_argument("--tokenizer_name", type=str, default="meta-llama/Meta-Llama-3.1-8B-Instruct") + + return parser.parse_args() + +def main(args): + global analysis_files_path + + commit0_dataset_name = "wentingzhao/commit0_combined" + submissions_dataset_name = "celinelee/commit0_submissions" + dataset = load_dataset(commit0_dataset_name, split="test") # type: ignore + submission_dataset = load_dataset(submissions_dataset_name, split="train") + + + if args.get_blank_details: + if args.do_setup: + os.system(f"commit0 setup {args.split} --base-dir {analysis_files_path}/repos --commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml") + branch_name = "blank" + if not args.keep_previous_eval: + if os.path.exists(os.path.join(analysis_files_path, branch_name)): + shutil.rmtree(os.path.join(analysis_files_path, branch_name)) + os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True) + tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name) + for example in dataset: + repo_name = example["repo"].split('/')[-1] + if args.split != "all" and repo_name not in SPLIT[args.split]: + continue + + repo_metrics_output_file = os.path.join(analysis_files_path, branch_name, f"{repo_name}.json") + blank_source_code_folder = os.path.join(analysis_files_path, "repos", repo_name, example["src_dir"]) + spec_filepath = os.path.join(analysis_files_path, "repos", repo_name, "spec.pdf") + + repo_metrics = get_blank_repo_metrics( + blank_source_code_folder, + spec_filepath, + tokenizer, + code_file_filter=lambda filename: re.fullmatch(r'.*\.py', filename) is not None, + ) + json.dump(repo_metrics, open(repo_metrics_output_file, "w"), indent=4) + + if args.get_reference_details: + if args.do_setup: + os.system(f"commit0 setup {args.split} --base-dir {analysis_files_path}/repos --commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml") + branch_name = "reference" + os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True) + if not args.keep_previous_eval: + for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"): + if os.path.exists(os.path.join(repo_log_path, branch_name)): + shutil.rmtree(os.path.join(repo_log_path, branch_name)) + os.system(f"commit0 evaluate --reference --commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml") + + # get coverage and pytest info for each repo + for example in dataset: + repo_name = example["repo"].split('/')[-1] + if args.split != "all" and repo_name not in SPLIT[args.split]: + continue + + repo_metrics_output_file = os.path.join(analysis_files_path, branch_name, f"{repo_name}.json") + + path_to_logs = f"{os.getcwd()}/logs/pytest/{repo_name}/{branch_name}" + pytest_results = get_pytest_info(path_to_logs, repo_name, branch_name) + json.dump(pytest_results, open(repo_metrics_output_file, "w"), indent=4) + + if args.analyze_submissions: + commit0_dot_file_path = os.path.join(analysis_files_path, "submission_repos", ".commit0.yaml") + for submission in submission_dataset: + branch_name = submission['name'] + os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True) + if not args.keep_previous_eval: + for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"): + if os.path.exists(os.path.join(repo_log_path, branch_name)): + shutil.rmtree(os.path.join(repo_log_path, branch_name)) + for example in dataset: + repo_name = example["repo"].split('/')[-1] + if args.split != "all" and repo_name not in SPLIT[args.split]: + continue + clone_url = f"https://github.com/test-save-commit0/{repo_name}.git" + clone_dir = os.path.abspath(os.path.join(analysis_files_path, "submission_repos", repo_name)) + repo = clone_repo(clone_url, clone_dir, branch_name, logger) + # after successfully setup, write the commit0 dot file + write_commit0_dot_file( + commit0_dot_file_path, + { + "dataset_name": commit0_dataset_name, + "dataset_split": "test", + "repo_split": args.split, + "base_dir": os.path.join(analysis_files_path, "submission_repos"), + }, + ) + # run pytests + os.system(f"commit0 evaluate --branch {branch_name} --commit0-dot-file-path {commit0_dot_file_path}") + for example in dataset: + repo_name = example["repo"].split('/')[-1] + if args.split != "all" and repo_name not in SPLIT[args.split]: + continue + + repo_metrics_output_file = os.path.join(analysis_files_path, branch_name, f"{repo_name}.json") + + path_to_logs = f"{os.getcwd()}/logs/pytest/{repo_name}/{branch_name}" + pytest_results = get_pytest_info(path_to_logs, repo_name, branch_name) + json.dump(pytest_results, open(repo_metrics_output_file, "w"), indent=4) + + if not args.keep_previous_eval: + for analysis_file in glob.glob("docs/analysis*.md"): + os.unlink(analysis_file) + if args.render_webpages: render_mds() + + +main(get_args()) diff --git a/mkdocs.yml b/mkdocs.yml index 67cf1fb..7e87ed1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,6 +6,7 @@ nav: - Setup: setup.md - Extending: repos.md - About: about.md + - Submission Analysis: analysis.md theme: name: material logo: "logo2.webp" From f01bb43d064556d9b442362064688a5dedaa518d Mon Sep 17 00:00:00 2001 From: Celine Lee Date: Wed, 25 Sep 2024 16:55:02 -0400 Subject: [PATCH 2/7] flaked --- docs/render_submissions.py | 359 ++++++++++++++++++++++++------------- 1 file changed, 234 insertions(+), 125 deletions(-) diff --git a/docs/render_submissions.py b/docs/render_submissions.py index 8ab9be2..3214f35 100644 --- a/docs/render_submissions.py +++ b/docs/render_submissions.py @@ -3,18 +3,14 @@ import glob import ast from datasets import load_dataset -import subprocess import json import shutil -import sys import argparse from transformers import AutoTokenizer -import commit0.harness.setup -from commit0.harness.constants import SPLIT, SPLIT_ALL +from commit0.harness.constants import SPLIT from commit0.harness.utils import clone_repo from commit0.cli import write_commit0_dot_file import pypdf -# from render_utils import _find_files_to_edit import logging @@ -25,46 +21,67 @@ analysis_files_path = "/share/rush/commit0_analysis_temp" -def get_pytest_info(path_to_logs, repo_name, branch_name): + +def get_pytest_info(path_to_logs, repo_name, branch_name): pytest_info = {} for pytest_hash in os.listdir(path_to_logs): eval_script = open(os.path.join(path_to_logs, pytest_hash, "eval.sh")).read() - testname = re.search(r'([\S]+) > test_output', eval_script).group(1) + testname = re.search(r"([\S]+) > test_output", eval_script).group(1) patch_diff = open(os.path.join(path_to_logs, pytest_hash, "patch.diff")).read() - pytest_info[testname] = {'hash': pytest_hash, 'patch_diff': patch_diff, 'summary': pytest_summary, 'failures': {}, 'duration': pytest_report['duration']} + pytest_info[testname] = { + "hash": pytest_hash, + "patch_diff": patch_diff, + "failures": {}, + } report_file_path = os.path.join(path_to_logs, pytest_hash, "report.json") - if not os.path.exists(report_file_path): - reason_for_failure = open(os.path.join(path_to_logs, pytest_hash, "test_output.txt")).read() + if not os.path.exists(report_file_path): + reason_for_failure = open( + os.path.join(path_to_logs, pytest_hash, "test_output.txt") + ).read() pytest_info[testname]["failed_to_run"] = reason_for_failure return pytest_info - pytest_report = json.load(report_file_path) - pytest_summary = pytest_report['summary'] - if 'passed' not in pytest_summary: pytest_summary['passed'] = 0 + pytest_report = json.load(open(report_file_path)) + pytest_summary = pytest_report["summary"] + pytest_info[testname]["summary"] = pytest_summary + pytest_info[testname]["duration"] = pytest_report["duration"] + if "passed" not in pytest_summary: + pytest_summary["passed"] = 0 for test in pytest_report["tests"]: - if test['outcome'] == "passed": continue - if 'longrepr' in test: - failure_string = test['longrepr'] - elif '???' in test: - failure_string = test['???']['longrepr'] - elif test['outcome'] == 'error': - failure_string = test['setup']['longrepr'] - elif 'setup' in test and 'longrepr' in test['setup']: - failure_string = test['setup']['longrepr'] - elif 'call' in test and 'longrepr' in test['call']: - failure_string = test['call']['longrepr'] + if test["outcome"] == "passed": + continue + if "longrepr" in test: + failure_string = test["longrepr"] + elif "???" in test: + failure_string = test["???"]["longrepr"] + elif test["outcome"] == "error": + failure_string = test["setup"]["longrepr"] + elif "setup" in test and "longrepr" in test["setup"]: + failure_string = test["setup"]["longrepr"] + elif "call" in test and "longrepr" in test["call"]: + failure_string = test["call"]["longrepr"] # could use test['call']['traceback'] information and test['call']['crash'] for more info else: - import pdb; pdb.set_trace() - duration = 0. + import pdb + + pdb.set_trace() + duration = 0.0 for action_key in ["setup", "call", "teardown"]: - if action_key not in test: continue - if "duration" in test: duration += test["duration"] - pytest_info[testname]['failures'][test['nodeid']] = {"failure_string": failure_string, "duration": duration} + if action_key not in test: + continue + if "duration" in test: + duration += test["duration"] + pytest_info[testname]["failures"][test["nodeid"]] = { + "failure_string": failure_string, + "duration": duration, + } return pytest_info + def get_coverage_info(path_to_logs, repo_name, branch_name): - # for filename, file_coverage in json.load(open(os.path.join(path_to_logs, pytest_hash, "coverage.json")))["files"].items(): - # if not any(relevant_function.startswith(filename) for relevant_function in relevant_functions): continue + # coverage_fp = open(os.path.join(path_to_logs, pytest_hash, "coverage.json")) + # for filename, file_coverage in json.load(coverage_fp)["files"].items(): + # if not any(relevant_function.startswith(filename) for relevant_function in relevant_functions): + # continue # for funcname, func_coverage in file_coverage["functions"].items(): # if f"{filename}::{funcname}" not in relevant_functions: continue # pycov_info[testname][f"{filename}::{funcname}"] = { @@ -74,50 +91,63 @@ def get_coverage_info(path_to_logs, repo_name, branch_name): # } raise NotImplementedError -def get_blank_repo_metrics(blank_source_code_folder, spec_filename, tokenizer, code_file_filter=lambda filename:filename): + +def get_blank_repo_metrics( + blank_source_code_folder, + spec_filename, + tokenizer, + code_file_filter=lambda filename: filename, +): blank_repo_metrics = { "functions_to_edit": [], } - + for subdir, _, files in os.walk(blank_source_code_folder): - for file in files: - if not code_file_filter(file): continue + for file in files: + if not code_file_filter(file): + continue filename = os.path.join(subdir, file) - splitted = filename.split('/') + splitted = filename.split("/") hidden = False for one in splitted: - if one.startswith('.'): + if one.startswith("."): hidden = True break if hidden: continue try: - code = open(filename, encoding='utf-8').read() - except: - print(f"Trouble opening {filename}") + code = open(filename, encoding="utf-8").read() + except Exception as e: + print(f"{e}: Trouble opening {filename}") continue filename = filename[len(blank_source_code_folder):].lstrip(" /") try: code_tree = ast.parse(code) - except: - print(f"Trouble parsing {os.path.join(blank_source_code_folder, filename)}") + except Exception as e: + print( + f"{e}: Trouble parsing {os.path.join(blank_source_code_folder, filename)}" + ) continue - for node in ast.walk(code_tree): + for node in ast.walk(code_tree): if isinstance(node, ast.ClassDef): for child in node.body: child.parent_class = node.name elif isinstance(node, ast.FunctionDef) and len(node.body) > 0: classname = "" - if hasattr(node, "parent_class"): - classname = f"{node.parent_class}." + if hasattr(node, "parent_class"): + classname = f"{node.parent_class}." for child in node.body: child.parent_function = f"{classname}{node.name}" elif isinstance(node, ast.Pass): - if hasattr(node, "parent_function"): - blank_repo_metrics["functions_to_edit"].append(f"{filename}::{node.parent_function}") - elif hasattr(node, "parent_class"): - blank_repo_metrics["functions_to_edit"].append(f"{filename}::{node.parent_class}") + if hasattr(node, "parent_function"): + blank_repo_metrics["functions_to_edit"].append( + f"{filename}::{node.parent_function}" + ) + elif hasattr(node, "parent_class"): + blank_repo_metrics["functions_to_edit"].append( + f"{filename}::{node.parent_class}" + ) # Get spec metrics concatted_spec = "" @@ -126,56 +156,92 @@ def get_blank_repo_metrics(blank_source_code_folder, spec_filename, tokenizer, c try: concatted_spec += page.extract_text() except pypdf.errors.PdfReadError as e: - print(f"Could not load page {p_idx} of {spec_filename}, excluding") - blank_repo_metrics["no_tokens_in_spec"] = tokenizer(concatted_spec, return_tensors='pt').input_ids.shape[-1] - + print(f"{e}: Could not load page {p_idx} of {spec_filename}, excluding...") + blank_repo_metrics["no_tokens_in_spec"] = tokenizer( + concatted_spec, return_tensors="pt" + ).input_ids.shape[-1] + return blank_repo_metrics + def render_mds(subfolder="docs"): all_submissions = {} method_repo_pytests = {} - for branch_name in glob.glob(os.path.join(analysis_files_path, '*')): + for branch_name in glob.glob(os.path.join(analysis_files_path, "*")): branch_name = os.path.basename(branch_name) - if branch_name in {"blank", "repos", "submission_repos"}: continue + if branch_name in {"blank", "repos", "submission_repos"}: + continue all_submissions[branch_name] = {} - for repo_file in glob.glob(os.path.join(analysis_files_path, branch_name, '*.json')): - - repo_metrics_output_file = os.path.join(analysis_files_path, branch_name, repo_file) + for repo_file in glob.glob( + os.path.join(analysis_files_path, branch_name, "*.json") + ): + + repo_metrics_output_file = os.path.join( + analysis_files_path, branch_name, repo_file + ) repo_metrics = json.load(open(repo_metrics_output_file)) - repo_name = os.path.basename(repo_file[:-len(".json")]) - + repo_name = os.path.basename(repo_file[: -len(".json")]) + all_submissions[branch_name][repo_name] = {} - method_repo_pytests[f"{branch_name}_{repo_name}"] = f"# Submission Name: {branch_name}\n# Repository: {repo_name}" - if 'pytest_results' in repo_metrics: repo_metrics = repo_metrics['pytest_results'] + method_repo_pytests[ + f"{branch_name}_{repo_name}" + ] = f"# Submission Name: {branch_name}\n# Repository: {repo_name}" + if "pytest_results" in repo_metrics: + repo_metrics = repo_metrics["pytest_results"] for pytest_group, pytest_info in repo_metrics.items(): pytest_group = os.path.basename(pytest_group.strip("/")) - patch_diff = f"""\n\n### Patch diff\n```diff\n{pytest_info['patch_diff']}```""" - if 'reason_for_failure' in pytest_info: - all_submissions[branch_name][repo_name][pytest_group] = {"reason_for_failure": pytest_info["failed_to_run"]} - method_repo_pytests[f"{branch_name}_{repo_name}"] += f"""\n## Failed to run pytests\n{ pytest_info['failed_to_run']}""" + patch_diff = ( + f"""\n\n### Patch diff\n```diff\n{pytest_info['patch_diff']}```""" + ) + if "reason_for_failure" in pytest_info: + all_submissions[branch_name][repo_name][pytest_group] = { + "reason_for_failure": pytest_info["failed_to_run"] + } + method_repo_pytests[ + f"{branch_name}_{repo_name}" + ] += f"""\n## Failed to run pytests\n{ pytest_info['failed_to_run']}""" else: - all_submissions[branch_name][repo_name][pytest_group] = {"summary": pytest_info['summary'], "duration": pytest_info["duration"]} - method_repo_pytests[f"{branch_name}_{repo_name}"] += f"""\n## Pytest Summary: {pytest_group} + all_submissions[branch_name][repo_name][pytest_group] = { + "summary": pytest_info["summary"], + "duration": pytest_info["duration"], + } + method_repo_pytests[ + f"{branch_name}_{repo_name}" + ] += f"""\n## Pytest Summary: {pytest_group} | status | count | |:---------|:-----:| """ - for category, count in pytest_info['summary'].items(): - if category not in {'duration'}: - method_repo_pytests[f"{branch_name}_{repo_name}"] += f"""| {category} | {count} |\n""" - else: - method_repo_pytests[f"{branch_name}_{repo_name}"] += f"""| {category} | {float(count):.2f}s |\n""" - - method_repo_pytests[f"{branch_name}_{repo_name}"] += f"\n## Failed pytest outputs: {pytest_group}\n\n" - for testname, failure in pytest_info['failures'].items(): + for category, count in pytest_info["summary"].items(): + if category not in {"duration"}: + method_repo_pytests[ + f"{branch_name}_{repo_name}" + ] += f"""| {category} | {count} |\n""" + else: + method_repo_pytests[ + f"{branch_name}_{repo_name}" + ] += f"""| {category} | {float(count):.2f}s |\n""" + + method_repo_pytests[ + f"{branch_name}_{repo_name}" + ] += f"\n## Failed pytest outputs: {pytest_group}\n\n" + for testname, failure in pytest_info["failures"].items(): shortened_testname = os.path.basename(testname) - method_repo_pytests[f"{branch_name}_{repo_name}"] += f"### {shortened_testname}\n\n
{shortened_testname}
\n{failure['failure_string']}\n
\n
\n" + method_repo_pytests[f"{branch_name}_{repo_name}"] += ( + f"### {shortened_testname}\n\n
{shortened_testname}"
+                            "
\n{failure['failure_string']}\n
\n
\n" + ) back_button = f"[back to {branch_name} summary]({os.path.join('/', f'analysis_{branch_name}')})\n\n" - with open(os.path.join(subfolder, f"analysis_{branch_name}_{repo_name}.md"), 'w') as wf: - wf.write(back_button + method_repo_pytests[f"{branch_name}_{repo_name}"] + patch_diff) - + with open( + os.path.join(subfolder, f"analysis_{branch_name}_{repo_name}.md"), "w" + ) as wf: + wf.write( + back_button + + method_repo_pytests[f"{branch_name}_{repo_name}"] + + patch_diff + ) # Render general page. Has buttons to all methods leaderboard = """ @@ -185,33 +251,46 @@ def render_mds(subfolder="docs"): method_to_repos = {} # Render method & repo page. Has "back" button. for branch_name, branch_info in all_submissions.items(): - cum_pytests = {'passed': 0} - method_to_repos[branch_name] = """ + cum_pytests = {"passed": 0} + method_to_repos[ + branch_name + ] = """ | | Repository | Summary | | |-|------------|---------|-|""" - total_tests = 0 # better info is probably broken down by split lol TODO - total_duration = 0. + total_tests = 0 # better info is probably broken down by split lol TODO + total_duration = 0.0 for repo_name, repo_test_info in branch_info.items(): for testname, test_info in repo_test_info.items(): - total_duration += test_info['duration'] if "reason_for_failure" in test_info: summary_pytests_string = "failure" else: - summary_pytests_string = f"{testname}: {test_info['summary']['passed']} / {test_info['summary']['collected']} ; duration: { test_info['duration']:.2f}s" + total_duration += test_info["duration"] + summary_pytests_string = ( + f"{testname}: {test_info['summary']['passed']} / " + "{test_info['summary']['collected']} ; duration: { test_info['duration']:.2f}s" + ) for category, count in test_info["summary"].items(): if category not in cum_pytests: cum_pytests[category] = 0 - if isinstance(count, int): cum_pytests[category] += int(count) - elif isinstance(count, float): cum_pytests[category] += float(count) + if isinstance(count, int): + cum_pytests[category] += int(count) + elif isinstance(count, float): + cum_pytests[category] += float(count) total_tests += 1 - method_to_repos[branch_name] += f"\n||[{repo_name}]({os.path.join('/', f'analysis_{branch_name}_{repo_name}')})|{summary_pytests_string}||" - break # assume we ran all tests. will add functionality for checking diff tests later, as we need it. - summary_pytests_string = f"{cum_pytests['passed']} / {total_tests} ; duration: {total_duration:.2f}s" + method_to_repos[branch_name] += ( + f"\n||[{repo_name}]({os.path.join('/', f'analysis_{branch_name}_{repo_name}')})|" + f"{summary_pytests_string}||" + ) + break # assume we ran all tests. will add functionality for checking diff tests later, as we need it. + summary_pytests_string = ( + f"{cum_pytests['passed']} / {total_tests} ; duration: {total_duration:.2f}s" + ) leaderboard += f"\n||[{branch_name}]({os.path.join('/', f'analysis_{branch_name}')})|{summary_pytests_string}||" - with open(os.path.join(subfolder, f"analysis_{branch_name}.md"), 'w') as wf: - wf.write( method_to_repos[branch_name]) - with open(os.path.join(subfolder, "analysis.md"), 'w') as wf: - wf.write(leaderboard) + with open(os.path.join(subfolder, f"analysis_{branch_name}.md"), "w") as wf: + wf.write(method_to_repos[branch_name]) + with open(os.path.join(subfolder, "analysis.md"), "w") as wf: + wf.write(leaderboard) + def get_args(): parser = argparse.ArgumentParser() @@ -222,12 +301,15 @@ def get_args(): parser.add_argument("--analyze_submissions", action="store_true") parser.add_argument("--render_webpages", action="store_true") - parser.add_argument("--split", type=str, default='lite') + parser.add_argument("--split", type=str, default="lite") - parser.add_argument("--tokenizer_name", type=str, default="meta-llama/Meta-Llama-3.1-8B-Instruct") + parser.add_argument( + "--tokenizer_name", type=str, default="meta-llama/Meta-Llama-3.1-8B-Instruct" + ) return parser.parse_args() + def main(args): global analysis_files_path @@ -235,73 +317,94 @@ def main(args): submissions_dataset_name = "celinelee/commit0_submissions" dataset = load_dataset(commit0_dataset_name, split="test") # type: ignore submission_dataset = load_dataset(submissions_dataset_name, split="train") - if args.get_blank_details: - if args.do_setup: - os.system(f"commit0 setup {args.split} --base-dir {analysis_files_path}/repos --commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml") + if args.do_setup: + os.system( + f"commit0 setup {args.split} --base-dir {analysis_files_path}/repos " + "--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml" + ) branch_name = "blank" if not args.keep_previous_eval: if os.path.exists(os.path.join(analysis_files_path, branch_name)): shutil.rmtree(os.path.join(analysis_files_path, branch_name)) os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True) - tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name) + tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name) for example in dataset: - repo_name = example["repo"].split('/')[-1] + repo_name = example["repo"].split("/")[-1] if args.split != "all" and repo_name not in SPLIT[args.split]: continue - repo_metrics_output_file = os.path.join(analysis_files_path, branch_name, f"{repo_name}.json") - blank_source_code_folder = os.path.join(analysis_files_path, "repos", repo_name, example["src_dir"]) - spec_filepath = os.path.join(analysis_files_path, "repos", repo_name, "spec.pdf") + repo_metrics_output_file = os.path.join( + analysis_files_path, branch_name, f"{repo_name}.json" + ) + blank_source_code_folder = os.path.join( + analysis_files_path, "repos", repo_name, example["src_dir"] + ) + spec_filepath = os.path.join( + analysis_files_path, "repos", repo_name, "spec.pdf" + ) repo_metrics = get_blank_repo_metrics( - blank_source_code_folder, - spec_filepath, - tokenizer, - code_file_filter=lambda filename: re.fullmatch(r'.*\.py', filename) is not None, - ) + blank_source_code_folder, + spec_filepath, + tokenizer, + code_file_filter=lambda filename: re.fullmatch(r".*\.py", filename) + is not None, + ) json.dump(repo_metrics, open(repo_metrics_output_file, "w"), indent=4) if args.get_reference_details: - if args.do_setup: - os.system(f"commit0 setup {args.split} --base-dir {analysis_files_path}/repos --commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml") + if args.do_setup: + os.system( + f"commit0 setup {args.split} --base-dir {analysis_files_path}/repos " + "--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml" + ) branch_name = "reference" os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True) if not args.keep_previous_eval: for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"): if os.path.exists(os.path.join(repo_log_path, branch_name)): shutil.rmtree(os.path.join(repo_log_path, branch_name)) - os.system(f"commit0 evaluate --reference --commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml") + os.system( + "commit0 evaluate --reference " + f"--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml" + ) # get coverage and pytest info for each repo for example in dataset: - repo_name = example["repo"].split('/')[-1] + repo_name = example["repo"].split("/")[-1] if args.split != "all" and repo_name not in SPLIT[args.split]: continue - repo_metrics_output_file = os.path.join(analysis_files_path, branch_name, f"{repo_name}.json") + repo_metrics_output_file = os.path.join( + analysis_files_path, branch_name, f"{repo_name}.json" + ) path_to_logs = f"{os.getcwd()}/logs/pytest/{repo_name}/{branch_name}" pytest_results = get_pytest_info(path_to_logs, repo_name, branch_name) json.dump(pytest_results, open(repo_metrics_output_file, "w"), indent=4) if args.analyze_submissions: - commit0_dot_file_path = os.path.join(analysis_files_path, "submission_repos", ".commit0.yaml") + commit0_dot_file_path = os.path.join( + analysis_files_path, "submission_repos", ".commit0.yaml" + ) for submission in submission_dataset: - branch_name = submission['name'] + branch_name = submission["name"] os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True) if not args.keep_previous_eval: for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"): if os.path.exists(os.path.join(repo_log_path, branch_name)): shutil.rmtree(os.path.join(repo_log_path, branch_name)) for example in dataset: - repo_name = example["repo"].split('/')[-1] + repo_name = example["repo"].split("/")[-1] if args.split != "all" and repo_name not in SPLIT[args.split]: continue clone_url = f"https://github.com/test-save-commit0/{repo_name}.git" - clone_dir = os.path.abspath(os.path.join(analysis_files_path, "submission_repos", repo_name)) - repo = clone_repo(clone_url, clone_dir, branch_name, logger) + clone_dir = os.path.abspath( + os.path.join(analysis_files_path, "submission_repos", repo_name) + ) + clone_repo(clone_url, clone_dir, branch_name, logger) # after successfully setup, write the commit0 dot file write_commit0_dot_file( commit0_dot_file_path, @@ -313,22 +416,28 @@ def main(args): }, ) # run pytests - os.system(f"commit0 evaluate --branch {branch_name} --commit0-dot-file-path {commit0_dot_file_path}") + os.system( + f"commit0 evaluate --branch {branch_name} " + "--commit0-dot-file-path {commit0_dot_file_path}" + ) for example in dataset: - repo_name = example["repo"].split('/')[-1] + repo_name = example["repo"].split("/")[-1] if args.split != "all" and repo_name not in SPLIT[args.split]: continue - repo_metrics_output_file = os.path.join(analysis_files_path, branch_name, f"{repo_name}.json") + repo_metrics_output_file = os.path.join( + analysis_files_path, branch_name, f"{repo_name}.json" + ) path_to_logs = f"{os.getcwd()}/logs/pytest/{repo_name}/{branch_name}" pytest_results = get_pytest_info(path_to_logs, repo_name, branch_name) json.dump(pytest_results, open(repo_metrics_output_file, "w"), indent=4) - - if not args.keep_previous_eval: + + if not args.keep_previous_eval: for analysis_file in glob.glob("docs/analysis*.md"): os.unlink(analysis_file) - if args.render_webpages: render_mds() + if args.render_webpages: + render_mds() main(get_args()) From 1400b835857024a6d3e5b8c06d347fd6539d29ff Mon Sep 17 00:00:00 2001 From: Celine Lee Date: Wed, 25 Sep 2024 17:29:36 -0400 Subject: [PATCH 3/7] formatting and diffs --- docs/render_submissions.py | 46 +++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/docs/render_submissions.py b/docs/render_submissions.py index 3214f35..811d3ec 100644 --- a/docs/render_submissions.py +++ b/docs/render_submissions.py @@ -61,9 +61,7 @@ def get_pytest_info(path_to_logs, repo_name, branch_name): failure_string = test["call"]["longrepr"] # could use test['call']['traceback'] information and test['call']['crash'] for more info else: - import pdb - - pdb.set_trace() + breakpoint() duration = 0.0 for action_key in ["setup", "call", "teardown"]: if action_key not in test: @@ -195,13 +193,13 @@ def render_mds(subfolder="docs"): patch_diff = ( f"""\n\n### Patch diff\n```diff\n{pytest_info['patch_diff']}```""" ) - if "reason_for_failure" in pytest_info: + if "failed_to_run" in pytest_info: all_submissions[branch_name][repo_name][pytest_group] = { - "reason_for_failure": pytest_info["failed_to_run"] + "failed_to_run": pytest_info["failed_to_run"] } method_repo_pytests[ f"{branch_name}_{repo_name}" - ] += f"""\n## Failed to run pytests\n{ pytest_info['failed_to_run']}""" + ] += f"""\n## Failed to run pytests\n```\n{pytest_info['failed_to_run']}\n```""" else: all_submissions[branch_name][repo_name][pytest_group] = { "summary": pytest_info["summary"], @@ -210,9 +208,9 @@ def render_mds(subfolder="docs"): method_repo_pytests[ f"{branch_name}_{repo_name}" ] += f"""\n## Pytest Summary: {pytest_group} - | status | count | - |:---------|:-----:| - """ +| status | count | +|:---------|:-----:| +""" for category, count in pytest_info["summary"].items(): if category not in {"duration"}: method_repo_pytests[ @@ -230,10 +228,10 @@ def render_mds(subfolder="docs"): shortened_testname = os.path.basename(testname) method_repo_pytests[f"{branch_name}_{repo_name}"] += ( f"### {shortened_testname}\n\n
{shortened_testname}"
-                            "
\n{failure['failure_string']}\n
\n
\n" + f"
\n{failure['failure_string']}\n
\n\n" ) - back_button = f"[back to {branch_name} summary]({os.path.join('/', f'analysis_{branch_name}')})\n\n" + back_button = f"[back to {branch_name} summary]({f'analysis_{branch_name}'})\n\n" with open( os.path.join(subfolder, f"analysis_{branch_name}_{repo_name}.md"), "w" ) as wf: @@ -252,22 +250,20 @@ def render_mds(subfolder="docs"): # Render method & repo page. Has "back" button. for branch_name, branch_info in all_submissions.items(): cum_pytests = {"passed": 0} - method_to_repos[ - branch_name - ] = """ + method_to_repos[branch_name] = """ | | Repository | Summary | | |-|------------|---------|-|""" total_tests = 0 # better info is probably broken down by split lol TODO total_duration = 0.0 for repo_name, repo_test_info in branch_info.items(): for testname, test_info in repo_test_info.items(): - if "reason_for_failure" in test_info: + if "failed_to_run" in test_info: summary_pytests_string = "failure" else: total_duration += test_info["duration"] summary_pytests_string = ( f"{testname}: {test_info['summary']['passed']} / " - "{test_info['summary']['collected']} ; duration: { test_info['duration']:.2f}s" + f"{test_info['summary']['collected']} ; duration: { test_info['duration']:.2f}s" ) for category, count in test_info["summary"].items(): if category not in cum_pytests: @@ -278,14 +274,14 @@ def render_mds(subfolder="docs"): cum_pytests[category] += float(count) total_tests += 1 method_to_repos[branch_name] += ( - f"\n||[{repo_name}]({os.path.join('/', f'analysis_{branch_name}_{repo_name}')})|" + f"\n||[{repo_name}]({f'analysis_{branch_name}_{repo_name}'})|" f"{summary_pytests_string}||" ) break # assume we ran all tests. will add functionality for checking diff tests later, as we need it. summary_pytests_string = ( f"{cum_pytests['passed']} / {total_tests} ; duration: {total_duration:.2f}s" ) - leaderboard += f"\n||[{branch_name}]({os.path.join('/', f'analysis_{branch_name}')})|{summary_pytests_string}||" + leaderboard += f"\n||[{branch_name}]({f'analysis_{branch_name}'})|{summary_pytests_string}||" with open(os.path.join(subfolder, f"analysis_{branch_name}.md"), "w") as wf: wf.write(method_to_repos[branch_name]) with open(os.path.join(subfolder, "analysis.md"), "w") as wf: @@ -322,7 +318,7 @@ def main(args): if args.do_setup: os.system( f"commit0 setup {args.split} --base-dir {analysis_files_path}/repos " - "--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml" + f"--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml" ) branch_name = "blank" if not args.keep_previous_eval: @@ -358,7 +354,7 @@ def main(args): if args.do_setup: os.system( f"commit0 setup {args.split} --base-dir {analysis_files_path}/repos " - "--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml" + f"--commit0-dot-file-path {analysis_files_path}/repos/.commit0.yaml" ) branch_name = "reference" os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True) @@ -389,6 +385,14 @@ def main(args): commit0_dot_file_path = os.path.join( analysis_files_path, "submission_repos", ".commit0.yaml" ) + if not args.keep_previous_eval: + for subfolder in glob.glob(os.path.join(analysis_files_path, "*")): + if os.path.basename(subfolder) not in {"blank", "reference", "repos", "submission_repos"}: + try: + shutil.rmtree(analysis_files_path, subfolder) + except Exception as e: + print(f"{e}: when removing {subfolder}") + for submission in submission_dataset: branch_name = submission["name"] os.makedirs(os.path.join(analysis_files_path, branch_name), exist_ok=True) @@ -418,7 +422,7 @@ def main(args): # run pytests os.system( f"commit0 evaluate --branch {branch_name} " - "--commit0-dot-file-path {commit0_dot_file_path}" + f"--commit0-dot-file-path {commit0_dot_file_path}" ) for example in dataset: repo_name = example["repo"].split("/")[-1] From d28e0826b113be83e75433d65e0f5a5176b10abf Mon Sep 17 00:00:00 2001 From: Celine Lee Date: Wed, 25 Sep 2024 17:31:21 -0400 Subject: [PATCH 4/7] minor formatting then --- docs/render_submissions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/render_submissions.py b/docs/render_submissions.py index 811d3ec..0971c75 100644 --- a/docs/render_submissions.py +++ b/docs/render_submissions.py @@ -262,8 +262,8 @@ def render_mds(subfolder="docs"): else: total_duration += test_info["duration"] summary_pytests_string = ( - f"{testname}: {test_info['summary']['passed']} / " - f"{test_info['summary']['collected']} ; duration: { test_info['duration']:.2f}s" + f"`{testname}`: {test_info['summary']['passed']} / " + f"{test_info['summary']['collected']} ; duration: {test_info['duration']:.2f}s" ) for category, count in test_info["summary"].items(): if category not in cum_pytests: From 7ffc9a53664b78bb7969d972f315ebe267e538c8 Mon Sep 17 00:00:00 2001 From: Celine Lee Date: Wed, 25 Sep 2024 17:40:02 -0400 Subject: [PATCH 5/7] remove breakpoints --- docs/render_submissions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/render_submissions.py b/docs/render_submissions.py index 0971c75..81b7409 100644 --- a/docs/render_submissions.py +++ b/docs/render_submissions.py @@ -61,7 +61,7 @@ def get_pytest_info(path_to_logs, repo_name, branch_name): failure_string = test["call"]["longrepr"] # could use test['call']['traceback'] information and test['call']['crash'] for more info else: - breakpoint() + failure_string = "" duration = 0.0 for action_key in ["setup", "call", "teardown"]: if action_key not in test: From 22b1f98869920fc6819ea85f45cacd4ebe1043dd Mon Sep 17 00:00:00 2001 From: Celine Lee Date: Wed, 25 Sep 2024 17:41:14 -0400 Subject: [PATCH 6/7] typo environments --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 24db8f3..3d56f78 100644 --- a/docs/index.md +++ b/docs/index.md @@ -19,7 +19,7 @@ pass their unit tests. All libraries have: Commit-0 is an interactive environment that makes it easy to design and test new agents. You can: -* Efficiently run tests in isolated environemnts +* Efficiently run tests in isolated environments * Distribute testing and development across cloud systems * Track and log all changes made throughout. From 967a55ba19b417155f5409572662deff93b88da4 Mon Sep 17 00:00:00 2001 From: Celine Lee Date: Wed, 25 Sep 2024 18:02:25 -0400 Subject: [PATCH 7/7] fix some counting --- docs/render_submissions.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/render_submissions.py b/docs/render_submissions.py index 81b7409..799bb16 100644 --- a/docs/render_submissions.py +++ b/docs/render_submissions.py @@ -253,7 +253,6 @@ def render_mds(subfolder="docs"): method_to_repos[branch_name] = """ | | Repository | Summary | | |-|------------|---------|-|""" - total_tests = 0 # better info is probably broken down by split lol TODO total_duration = 0.0 for repo_name, repo_test_info in branch_info.items(): for testname, test_info in repo_test_info.items(): @@ -272,18 +271,18 @@ def render_mds(subfolder="docs"): cum_pytests[category] += int(count) elif isinstance(count, float): cum_pytests[category] += float(count) - total_tests += 1 method_to_repos[branch_name] += ( f"\n||[{repo_name}]({f'analysis_{branch_name}_{repo_name}'})|" f"{summary_pytests_string}||" ) break # assume we ran all tests. will add functionality for checking diff tests later, as we need it. summary_pytests_string = ( - f"{cum_pytests['passed']} / {total_tests} ; duration: {total_duration:.2f}s" + f"{cum_pytests['passed']} / {cum_pytests['collected']} ; duration: {total_duration:.2f}s" ) leaderboard += f"\n||[{branch_name}]({f'analysis_{branch_name}'})|{summary_pytests_string}||" + back_button = f"[back to all submissions]({f'analysis'})\n\n" with open(os.path.join(subfolder, f"analysis_{branch_name}.md"), "w") as wf: - wf.write(method_to_repos[branch_name]) + wf.write(back_button + "\n" + method_to_repos[branch_name]) with open(os.path.join(subfolder, "analysis.md"), "w") as wf: wf.write(leaderboard) @@ -387,9 +386,10 @@ def main(args): ) if not args.keep_previous_eval: for subfolder in glob.glob(os.path.join(analysis_files_path, "*")): - if os.path.basename(subfolder) not in {"blank", "reference", "repos", "submission_repos"}: + if os.path.basename(subfolder.rstrip("/")) not in {"blank", "reference", "repos", "submission_repos"}: try: - shutil.rmtree(analysis_files_path, subfolder) + print(f"Clearing {subfolder}") + shutil.rmtree(subfolder) except Exception as e: print(f"{e}: when removing {subfolder}")