In [36]:
track_runs_sheet_path = "runs_sheet.json"
non_passing_instances_path = "eval_scripts/non_passing_instances.txt"
instance_id_column_name = "instance_id"
solved_column_name = "solved"
OVERALL_column_name = "OVERALL"
bench_log_path = "eval_scripts/bench_log.txt"

from swebench.google_sheets import get_column_values, get_rows_as_dicts

In [37]:
# open file
try:
    with open(non_passing_instances_path, "r") as file:
        non_passing_instances = file.readlines()
except FileNotFoundError:
    raise Exception(f"The file at {non_passing_instances_path} does not exist.")

total_instances = 500
original_non_passing_instances_count = len(non_passing_instances)
original_passing_instances_count = total_instances - original_non_passing_instances_count
original_passing_percentage = original_passing_instances_count / total_instances * 100

print(f"Original non-passing instances: {original_non_passing_instances_count}")
print(f"Original passing instances: {original_passing_instances_count}")
print(f"Original pass %: {original_passing_percentage:.2f}%")

Original non-passing instances: 234
Original passing instances: 266
Original pass %: 53.20%


In [38]:
# Load our runs sheet details for access through api
import json

with open(track_runs_sheet_path, "r") as file:
    runs_sheet = json.load(file)

log_sheet_id, sheet_id, sheet_name = runs_sheet["LOG_SHEET_ID"], runs_sheet["SHEET_ID"], runs_sheet["LOG_SHEET_NAME"]

if log_sheet_id is None or sheet_id is None or sheet_name is None:
    raise Exception("Sheet ID or name is None")


In [39]:
# Get 3 columns: instance_id, solved, OVERALL
columns = [instance_id_column_name, solved_column_name, OVERALL_column_name]
rows = get_rows_as_dicts(log_sheet_id, sheet_name, columns)

# Filter rows so that only those with FALSE remain
non_passing_rows = [r for r in rows if r[OVERALL_column_name] == "FALSE"]

# Sort based on solved count
non_passing_rows.sort(key=lambda r: int(r["solved"]), reverse=True)
print(non_passing_rows)

# Get just the instance_ids
non_passing_instance_ids = [row[instance_id_column_name] for row in non_passing_rows]
print(non_passing_instance_ids)

2024-12-11 12:19:27,176 - googleapiclient.discovery_cache - INFO - file_cache is only supported with oauth2client<4.0.0


[{'instance_id': 'django__django-17029', 'solved': '5', 'OVERALL': 'FALSE'}, {'instance_id': 'matplotlib__matplotlib-26342', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'pytest-dev__pytest-5809', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'pytest-dev__pytest-6202', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'pytest-dev__pytest-7205', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'pytest-dev__pytest-7571', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'pytest-dev__pytest-7982', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'pytest-dev__pytest-8399', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'scikit-learn__scikit-learn-11578', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'scikit-learn__scikit-learn-26323', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'sphinx-doc__sphinx-8475', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'sympy__sympy-13372', 'solved': '4', 'OVERALL': 'FALSE'}, {'instance_id': 'sympy__sympy-16450'

In [40]:
new_non_passing_instances_count = len(non_passing_instance_ids)
new_passing_instances_count = total_instances - new_non_passing_instances_count
new_passing_percentage = new_passing_instances_count / total_instances * 100
print(f"Old pass percentage: {original_passing_percentage:.2f}% ({original_passing_instances_count})")
print(f"New pass percentage: {new_passing_percentage:.2f}% ({new_passing_instances_count})")

Old pass percentage: 53.20% (266)
New pass percentage: 53.60% (268)


In [41]:
# Write non-passing instance_ids to eval_scripts/non_passing_instances.txt
with open(non_passing_instances_path, "w") as file:
    file.writelines(line + "\n" for line in non_passing_instance_ids)

print(f"{new_non_passing_instances_count} non-passing instances written to {non_passing_instances_path}.")


232 non-passing instances written to eval_scripts/non_passing_instances.txt.


In [42]:
# Update bench log with new pass percentage along with timestamp
from datetime import datetime

timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

with open(bench_log_path, "a") as file:
    file.write(f"{timestamp} - {new_passing_percentage:.2f}% ({new_passing_instances_count})\n")

print(f"Updated bench log written to {bench_log_path}.")



Updated bench log written to eval_scripts/bench_log.txt.
