In [None]:
import wandb
import pandas as pd
import numpy as np
from tabulate import tabulate

ENTITY = "confopt-team"
PROJECT = "ConfoptAutoML25-Models"

In [None]:
def pull_run_data(run_id, entity="confopt-team", project="ConfoptAutoML25-Models"):
    """
    Pull data from a W&B run given the entity, project, and run_id.
    
    Args:
        entity (str): The W&B entity (username or team).
        project (str): The name of the W&B project.
        run_id (str): The run identifier.
    
    Returns:
        dict: A dictionary containing the run summary, config, and history (as a pandas DataFrame).
    """
    api = wandb.Api()
    run_path = f"{entity}/{project}/{run_id}"
    try:
        run = api.run(run_path)
    except Exception as e:
        print(f"Error retrieving run: {e}")
        return None

    # Retrieve summary metrics (as a dict)
    summary = run.summary._json_dict

    # Retrieve configuration used for the run
    config = run.config

    # Retrieve run history (logged metrics) as a pandas DataFrame
    # history = run.history(pandas=True)

    return {"summary": summary, "config": config} # "history": history}

def get_run_ids_with_filter(filter_dict, entity="confopt-team", project="ConfoptAutoML25-models"):
    """
    Retrieve the run IDs for all runs in a project that match the given filter.

    Args:
        entity (str): The W&B entity (username or team).
        project (str): The name of the W&B project.
        filter_dict (dict): A dictionary defining the filter criteria.
            For example: {"config.learning_rate": 0.001}
    
    Returns:
        list: A list of run IDs that match the filter.
    """
    api = wandb.Api()
    runs = api.runs(f"{entity}/{project}", filters=filter_dict)
    run_ids = [run.id for run in runs]
    run_names = [run.name for run in runs]
    return run_ids, run_names

def print_wandb_links(run_ids, run_names, entity=ENTITY, project=PROJECT):
    for run_id, run_name in zip(run_ids, run_names):
        print(f"https://wandb.ai/{entity}/{project}/runs/{run_id}/overview ({run_name} {run_id})")

def get_list_of_incomplete_runs(results, optimizer, optimizer_other, opset, subspace, fingerprint_cols, seed=0):
    # Filter the results DataFrame based on the query parameters
    filtered_results = results[
        (results['optimizer'] == optimizer) &
        (results['optimizer_other'] == optimizer_other) &
        (results['opset'] == opset) &
        (results['subspace'] == subspace) &
        (results['seed'] == seed)
    ]

    incomplete_runs = []

    # Check if there are exactly nine rows
    if len(filtered_results) == 9:
        # Check if the only column that is different is hyperparameter_set
        unique_hyperparameter_sets = filtered_results['hyperparameter_set'].nunique()
        if unique_hyperparameter_sets == 9:
            # Check if all other columns (excluding those starting with '_') are identical
            identical = all(filtered_results[fingerprint_cols].nunique() == 1)
            if identical:
                # print("Experiment complete!")
                incomplete_runs = None
            else:
                print("Rows are not identical!")
                incomplete_runs = []
        else:
            print("There are exactly nine rows, but other columns are also different.")
            incomplete_runs = []
            
    else:
        # print(f"Only {len(filtered_results)}/9 runs complete.")\
        incomplete_runs = list(set(range(9)) - set(filtered_results['hyperparameter_set']))
        print("Duplicate runs!")
    
    return incomplete_runs
    

def get_result_for_exp(results_df, optimizer, optimizer_other, subspace, opset, seed):
    return results_df[
        (results_df['optimizer'] == optimizer) &
        (results_df['optimizer_other'] == optimizer_other) &
        (results_df['subspace'] == subspace) &
        (results_df['opset'] == opset) &
        (results_df['seed'] == seed)
    ].sort_values(by='hyperparameter_set')


def beautify_table(results_table):
    method_label_mapping = {
        "darts": "DARTS",
        "drnas": "DRNAS",
        "gdas": "GDAS",
        "pcdarts": "PC-DARTS",
        "sdarts": "SDARTS",
        "fairdarts": "FairDARTS",
        "oles": "OLES",
    }

    space_label_mapping = {
        "deep": "Deep",
        "wide": "Wide",
        "single_cell": "Single Cell",
    }

    opset_label_mapping = {
        "regular": "Regular",
        "all_skip": "All Skip",
        "no_skip": "No Skip",
    }

    df = results_table.copy()
    df['Test Accuracy (%)'] = df.apply(lambda row: f"{row['mean']:.2f} ± {row['std']:.2f}", axis=1)
    df['Maximum Test Accuracy (%)'] = df["max"]
    df["Method"] = df["optimizer"]
    df["Method"] = df["Method"].apply(lambda x: method_label_mapping[x])
    df["Operation Set"] = df["opset"].apply(lambda x: opset_label_mapping[x])
    df["Subspace"] = df["subspace"].apply(lambda x: space_label_mapping[x])
    df.sort_values(by=list(reversed(["Method", "Subspace", "Operation Set"])), inplace=True)
    return df


def generate_latex_table(results_table):
    # Select the columns you want to include in the table
    columns = ["Method", "Subspace", "Operation Set", "Test Accuracy (%)", "Maximum Test Accuracy (%)"]

    # Filter the DataFrame to include only the selected columns
    latex_df = beautify_table(results_table)[columns]

    # Convert the DataFrame to a LaTeX table
    latex_table = tabulate(latex_df, headers='keys', tablefmt='latex', showindex=False)

    # Print the LaTeX table
    print(latex_table)

In [None]:
filter_dict = {
    # "config.subspace": subspace,
    # "config.opset": opset,
    # "config.dataset": dataset,
    "config.optimizer": {"$in": ["darts", "gdas", "drnas"]},
    # "config.optimizer_other": optimizer_other,
    # "config.trainer.batch_size": batch_size,
    # "config.tag": {"$in": [
    #     "models-train",
    #     "models-train-mixed-hps",
    #     "models-train-batch3",
    #     "models-train-batch4",
    #     "models-train-batch5",
    #     ]},
    "config.lr": {"$ne": 0.001},
    "summary_metrics._step": 300,
    "state": "finished",
}

print(filter_dict)
run_ids, run_names = get_run_ids_with_filter(filter_dict)

len(run_ids)

In [None]:
all_run_data = []

for run_id in run_ids:
    # print(run_id)
    run_data = pull_run_data(run_id)
    data = run_data["config"]
    data.update(run_data["summary"])

    all_run_data.append(data)

In [None]:
results_df = pd.DataFrame(all_run_data)
fingerprint_cols = ["optimizer", "optimizer_other", "subspace", "opset", "genotype", "dataset", "seed", "_step"]
results_df[fingerprint_cols]

In [None]:
results_df.to_csv('complete_model_runs.csv', index=True)

In [None]:
samplers = ("darts", "drnas", "gdas")
subspaces = ("deep", "wide", "single_cell")
opsets = ("regular", "all_skip", "no_skip")
darts_others = ("baseline", "oles", "pcdarts", "sdarts", "fairdarts")

all_incomplete_runs = {}
n_incomplete_runs = 0
n_complete_runs = 0

n = 0

exps_complete = []

for sampler in samplers:
    for subspace in subspaces:
        for opset in opsets:

            others = darts_others if sampler == "darts" else ("baseline",)

            for other in others:
                n += 1
                incomplete_runs = get_list_of_incomplete_runs(
                    results_df,
                    sampler,
                    other,
                    opset,
                    subspace,
                    fingerprint_cols,
                    seed=0
                )
                model = f"{sampler}-{other}-{subspace}-{opset}"

                if incomplete_runs is None:
                    print(model, "\t\tAll runs complete!")
                    n_complete_runs += 9
                    m = {
                        "optimizer": sampler,
                        "optimizer_other": other,
                        "subspace": subspace,
                        "opset": opset,
                        "seed": 0,
                    }
                    exps_complete.append(m)
                elif len(incomplete_runs) > 0:
                    print(model, f"\t\tIncomplete runs: {incomplete_runs}")
                    all_incomplete_runs[model] = incomplete_runs
                    n_incomplete_runs += len(incomplete_runs)
                    n_complete_runs += (9 - len(incomplete_runs))
                else:
                    print(model, "\t\tIt's complicated.")

print(f"Total incomplete runs: {n_incomplete_runs}")
print(f"Total complete runs: {n_complete_runs}/{n*9}")
                
                

In [None]:

all_rows = []

for exp in exps_complete:
    # print(exp)
    exp_results = get_result_for_exp(results_df, **exp).describe()["discrete/test/acc_top1"]
    mean, std, mx = exp_results["mean"], exp_results["std"], exp_results["max"]
    print(exp, mean, std, mx)

    row_data = {k:v for k,v in exp.items()}

    optimizer = row_data["optimizer"]
    optimizer_other = row_data["optimizer_other"]
    if optimizer == "darts" and optimizer_other != "baselines":
        row_data["optimizer_other"] = optimizer_other
    
    row_data["mean"] = mean
    row_data["std"] = std
    row_data["max"] = mx
    all_rows.append(row_data)

results_table = pd.DataFrame(all_rows)

In [None]:
generate_latex_table(results_table)

In [None]:
all_incomplete_runs

In [None]:
for k, v in all_incomplete_runs.items():
    opt, other, space, opset = k.split("-")
    hpsets = set(v) | set((3,4,5))
    hpsets = ",".join(sorted(str(x) for x in hpsets))

    print(f"python launch_model_train.py --optimizer {opt} --subspace {space} --opset {opset} --dataset cifar10_model --hpsets {hpsets} --seed 0 --epochs 300 --other {other} --tag models-train --genotypes_folder exp/genotypes & sleep 5")


In [None]:
exps_complete[:5]

In [None]:
for experiment in exps_complete:
    opt, other, space, opset = experiment["optimizer"], experiment["optimizer_other"], experiment["subspace"], experiment["opset"]
    hpsets = "3,4,5"
    # hpsets = ",".join(sorted(str(x) for x in hpsets))

    print(f"python launch_model_train.py --optimizer {opt} --subspace {space} --opset {opset} --dataset cifar10_model --hpsets {hpsets} --seed 0 --epochs 300 --other {other} --tag models-train --genotypes_folder exp/genotypes & sleep 5")
