In [1]:
from pathlib import Path
import sys
project_root = next((parent for parent in [Path.cwd()] + list(Path.cwd().parents) if (parent / "pyproject.toml").exists()), Path.cwd())
sys.path.append(str(project_root))

In [2]:
from llm_python.utils.task_loader import get_task_loader


task_loader = get_task_loader()

subsets = task_loader.get_available_subsets()
task_subset_mapping = []
for subset in subsets:
    tasks = task_loader.get_subset_tasks(subset)
    for task_id, _ in tasks:
        task_subset_mapping.append({"task_id": task_id, "subset": subset})


In [None]:
from time import sleep
from google.cloud import bigquery

# Initialize BigQuery client
client = bigquery.Client(project="trelis-arc", location="europe-west2")

# Define table schema
schema = [
    bigquery.SchemaField("task_id", "STRING"),
    bigquery.SchemaField("subset", "STRING"),
]

# Prepare rows from task_subset_mappin

# Define table reference
table_id = "trelis-arc.arc.arc_task_subsets"

# Create or overwrite the table
table = bigquery.Table(table_id, schema=schema)
# If the table exists, delete it first to force recreation
try:
    client.delete_table(table_id, not_found_ok=True)
    print(f"Deleted table {table_id} if it existed.")
except Exception as e:
    print(f"Error deleting table {table_id}: {e}")

table = client.create_table(table)  # create table
print(f"Created table {table_id} with schema: {schema}")
sleep(60)
errors = client.insert_rows_json(table_id, task_subset_mapping)
if errors:
    print("Encountered errors while inserting rows: ", errors)
else:
    print(f"Inserted {len(task_subset_mapping)} rows into {table_id}")

Inserted 6592 rows into trelis-arc.arc.arc_task_subsets


In [None]:
query = """
SELECT subset, COUNT(DISTINCT task_id) AS num_tasks
FROM `trelis-arc.arc.arc_task_subsets`
GROUP BY subset
ORDER BY num_tasks DESC
"""

results = client.query(query).to_dataframe()
print(results)

                                  subset  num_tasks
0                arc-prize-2025/training       1000
1    arc-prize-2025/missing_10_solutions        588
2      arc-prize-2025/missing_1_solution        514
3              arc-prize-2024/evaluation        400
4                arc-prize-2024/training        400
..                                   ...        ...
79     arc-prize-2025/longest_training_1          1
80    arc-prize-2025/middle_evaluation_1          1
81      arc-prize-2025/middle_training_1          1
82  arc-prize-2025/shortest_evaluation_1          1
83    arc-prize-2025/shortest_training_1          1

[84 rows x 2 columns]


