In [50]:
track_runs_sheet_path = "runs_sheet.json"
non_passing_instances_path = "eval_scripts/non_passing_instances.txt"
instance_id_column_name = "instance_id"
bench_log_path = "eval_scripts/bench_log.txt"

from swebench.google_sheets import get_column_values

In [51]:
# open file
try:
    with open(non_passing_instances_path, "r") as file:
        non_passing_instances = file.readlines()
except FileNotFoundError:
    raise Exception(f"The file at {non_passing_instances_path} does not exist.")

total_instances = 500
non_passing_instances_count = len(non_passing_instances)
passing_instances_count = total_instances - non_passing_instances_count
passing_percentage = passing_instances_count / total_instances * 100

print(f"Non passing instances: {non_passing_instances_count}")
print(f"Passing instances: {passing_instances_count}")
print(f"Pass %: {passing_percentage:.2f}%")

Non passing instances: 382
Passing instances: 118
Pass %: 23.60%


In [52]:
# Open our sheet
import json

with open(track_runs_sheet_path, "r") as file:
    runs_sheet = json.load(file)

log_sheet_id, sheet_id, sheet_name = runs_sheet["LOG_SHEET_ID"], runs_sheet["SHEET_ID"], runs_sheet["LOG_SHEET_NAME"]

if log_sheet_id is None or sheet_id is None or sheet_name is None:
    raise Exception("Sheet ID or name is None")


In [53]:
# Get instance_id column
instance_ids = get_column_values(log_sheet_id, sheet_name, instance_id_column_name)
print("Found ", len(instance_ids), " instance ids")

2024-12-10 12:49:00,691 - googleapiclient.discovery_cache - INFO - file_cache is only supported with oauth2client<4.0.0


Found  500  instance ids


In [54]:
# Get OVERALL column
overall_column_name = "OVERALL"
overall_column_values = get_column_values(log_sheet_id, sheet_name, overall_column_name)

print(f"Found {len(overall_column_values)} values in {overall_column_name} column")

2024-12-10 12:49:03,877 - googleapiclient.discovery_cache - INFO - file_cache is only supported with oauth2client<4.0.0


Found 500 values in OVERALL column


In [55]:
# Get non-passing instances
non_passing = []
for i, overall_value in enumerate(overall_column_values):
    if overall_value == "FALSE":
        non_passing.append(instance_ids[i])

print(f"Found {len(non_passing)} non-passing instances")

new_pass_percentage = (total_instances - len(non_passing)) / total_instances * 100
print(f"Old pass percentage: {passing_percentage:.2f}%")
print(f"New pass percentage: {new_pass_percentage:.2f}%")

Found 382 non-passing instances
Old pass percentage: 23.60%
New pass percentage: 23.60%


In [56]:
# Write non-passing instance_ids to eval_scripts/non_passing_instances.txt
with open(non_passing_instances_path, "w") as file:
    file.writelines(line + "\n" for line in non_passing)

print(f"Updated instances written to {non_passing_instances_path}.")


Updated instances written to ./eval_scripts/non_passing_instances.txt.


In [None]:
# Update bench log with new pass percentage along with timestamp
from datetime import datetime

timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

with open(bench_log_path, "a") as file:
    file.write(f"{timestamp} - {new_pass_percentage:.2f}%\n")

print(f"Updated bench log written to {bench_log_path}.")