# WandB Sweep Analysis

In [None]:
import pandas as pd
import wandb

In [None]:
def get_runs_by_tag(project: str, tag: str, entity: str | None = None) -> pd.DataFrame:
    """Get ALL runs with a tag, regardless of state."""
    api = wandb.Api()
    path = f"{entity}/{project}" if entity else project

    # Get ALL runs with this tag
    filters = {"tags": {"$in": [tag]}}
    runs = api.runs(path, filters=filters)

    data = []
    for run in runs:
        row = {
            "run_id": run.id,
            "run_name": run.name,
            "state": run.state,
            "tags": run.tags,
            "url": run.url,
        }

        # Add all config parameters
        for key, value in run.config.items():
            row[f"config.{key}"] = value

        # Add all summary metrics
        for key, value in run.summary.items():
            if not key.startswith("_"):
                row[f"summary.{key}"] = value

        data.append(row)

    df = pd.DataFrame(data)

    if len(df) == 0:
        print(f"No runs found with tag '{tag}'")
    else:
        print(f"Found {len(df)} runs with tag '{tag}'")
        print(f"States: {df['state'].value_counts().to_dict()}")

    return df

In [None]:
# Get your runs
df = get_runs_by_tag(
    project="spd",
    tag="ss_llama_subset_bal_sans",  # Your tag
    entity="goodfire",
)

In [None]:
# Show the data
if len(df) > 0:
    display(df[["run_id", "run_name", "state"]].head(10))

In [None]:
# Get summary statistics for numeric metrics
if len(df) > 0:
    numeric_cols = [
        col
        for col in df.columns
        if col.startswith("summary.") and pd.api.types.is_numeric_dtype(df[col])
    ]
    if numeric_cols:
        summary_stats = df[numeric_cols].describe().T
        display(summary_stats)