### Get HP search runs and get their best validation score (and corresponding eval score, unused here)

In [None]:
import pandas as pd 
import numpy as np
import wandb
api = wandb.Api()

MODE = ["hpsearch2", "hpsearch3", "hpsearchrec", "full_run", "full_run_long"][3]

project_name = {
    "hpsearch2": "maranc/thesis-hpsearch",
    "hpsearch3": "maranc/thesis-hpsearch3",
    "hpsearchrec": "maranc/thesis-hpsearchrec",
    "full_run": "maranc/thesis-results",
    "full_run_long": "maranc/thesis-results-long",
}[MODE]

# Project is specified by <entity/project-name>

hyperparemeters = {
    "hpsearch2": ["learning_rate", "batch_size", "gamma", "train/clip_range", "gae_lambda", 
                   "ent_coef", "vf_coef", "n_layers", "n_epochs", "max_grad_norm"],
    "hpsearch3": ["learning_rate", "batch_size", "gamma", "train/clip_range", "gae_lambda",
                     "ent_coef", "vf_coef", "net_layers", "net_width", "n_epochs", "max_grad_norm"],
    "hpsearchrec": ["learning_rate", "batch_size", "gamma", "train/clip_range", "gae_lambda",
                     "ent_coef", "vf_coef", "net_layers", "net_width", "n_epochs", "max_grad_norm"],
    "full_run": ["learning_rate", "batch_size", "gamma", "train/clip_range", "gae_lambda",
                     "ent_coef", "vf_coef", "net_layers", "net_width", "n_epochs", "max_grad_norm"],
    "full_run_long": ["learning_rate", "batch_size", "gamma", "train/clip_range", "gae_lambda",
                        "ent_coef", "vf_coef", "net_layers", "net_width", "n_epochs", "max_grad_norm"],
}[MODE]
exp_parameters = ["mast_distancing", "privileged", "changing_wind", "dynamic_mode"]
general_parameters = ['name', 'run_id', 'eval_power', 'eval_power_std', "n_steps", 'validation_power', 'validation_power_argmax_pd', 'validation_power_argmax']



In [None]:
dict_runs = pd.DataFrame(columns=general_parameters + hyperparemeters + exp_parameters)

runs = api.runs(project_name)
print(f"Found {len(runs)} runs")
for run in runs:
    # add row to the dataframe
    history = run.history()

    if MODE == "hpsearch3":
        if "hp_search3" not in run.name:
            continue
        if run.state != "finished":
            continue
    # elif MODE == "hpsearch2":
    #     if "hp_search2" not in run.name:
    #         continue
    elif MODE == "full_run":
        if "hp_search" in run.name:
            continue
        if run.state != "finished":
            continue
    
    # Get performance parameters
    if "eval/validation_total_power" in history.columns:  
        validation_power = history["eval/validation_total_power"].dropna()
        # Best validation power, and corresponding timestep
        idx = validation_power.idxmax()
        eval_power = history["eval/total_power"][idx]
        eval_power_std = history["eval/total_power_std"][idx]
        validation_power_max = validation_power.max()
        validation_power_argmax_pd = validation_power.idxmax()
        validation_power_argmax = np.argmax(validation_power)
        print(f"Run {run.name} has eval power {eval_power}")

    hp_dict = {}
    for k in hyperparemeters:
        if k in run.config.keys():
            hp_dict[k] = run.config[k]
        else:
            hp_dict[k] = run.summary[k]
    exp_dict = {k: run.config[k] for k in exp_parameters}

    general_dict = {
        "name": run.name,
        "run_id": run.id,
        "eval_power": eval_power,
        "eval_power_std": eval_power_std,
        "n_steps": run.config["n_steps"],
        "validation_power": validation_power_max,
        "validation_power_argmax_pd": validation_power_argmax_pd,
        "validation_power_argmax": validation_power_argmax
    }

    dict_runs.loc[len(dict_runs)] = [*general_dict.values(), *hp_dict.values(), *exp_dict.values()]
print(dict_runs)

if MODE == "full_run" or MODE == "full_run_long":
    dict_runs["filtered_name"] = dict_runs["name"].apply(lambda x: "PPO".join(x.split("PPO")[:2]))
    dict_runs["filtered_name"] = dict_runs["filtered_name"].apply(lambda x: "_".join(x.split("_")[2:]))
else:
    dict_runs["filtered_name"] = dict_runs["name"].apply(lambda x: "_".join(x.split("_")[3:]))
dict_runs.to_csv(f"data/eval/scores/{MODE}_wandb_data.csv")

### Reload from download

In [None]:
import pandas as pd
# load from csv
dict_runs = pd.read_csv(f"data/eval/scores/{MODE}_wandb_data.csv")
exp_types = dict_runs["filtered_name"].unique()
exp_types = sorted(exp_types)
print(f"Detected {len(exp_types)} different experiment types")
for exp in exp_types: 
    l = len(dict_runs[dict_runs["filtered_name"] == exp])
    print(f"({l}) {exp}")


### Save results for each experiment (this is for final results and not hpsearch)

In [None]:
best_hp_df = pd.DataFrame(columns=["filtered_name"] + exp_parameters + hyperparemeters)
for exp in exp_types:
    runs = dict_runs[dict_runs["filtered_name"] == exp]
    print(f"Found {len(runs)} runs for {exp}")
    exp_params = runs.iloc[0][exp_parameters].to_dict()

    scores = runs["eval_power"].to_numpy()
    scores_std = runs["eval_power_std"].to_numpy()

    # Save eval results, to allow preliminary analysis
    filename = ""
    # if 4wt in filtered name
    if "4wt" in exp:
        filename += "4Symm"
    elif "_nt8_" in exp:
        filename += "8LHS"
    elif "_nt16_" in exp:
        filename += "16LHS"
    filename += "_"
    if exp_params["dynamic_mode"] == "observation_points":
        filename += "DynOP"
        if exp_params["privileged"]:
            # MDs are [25, 75, 125] and [100, 150, 200]
            if exp_params["mast_distancing"] in [25, 100]:
                filename += "_p0"
            elif exp_params["mast_distancing"] in [75, 150]:
                filename += "_p1"
            else:
                filename += "_p2"
    elif exp_params["changing_wind"] == True:
        filename += "CW"
    else:
        filename += "FW"
        
    if "recPPO" in exp:
        filename += "_RecPPO"
    else:
        filename += "_PPO"
    filename += "_total_powers"
    # Shape is (2) [reps], we want (reps, length)
    np.save(f"data/eval/scores/{filename}.npy", scores)
    # np.save(f"data/eval/scores/{filename}_std.npy", scores_std)
    
    print(f"{exp} ({filename})")
    print(f"Scores: {scores[:, None]} mean: {np.mean(scores)} +- {np.mean(scores_std)}")


### Compute best HPs (best mean val) and save them 

In [None]:
best_hp_df = pd.DataFrame(columns=["filtered_name"] + exp_parameters + hyperparemeters)
for exp in exp_types:
    runs = dict_runs[dict_runs["filtered_name"] == exp]
        
    # get mean and std of validation power for each hyperparameter combination and count of runs
    agg = {"validation_power": ["mean", "std", "count"], "eval_power": ["mean", "std"], "eval_power_std": ["mean", "std"]}
    x = runs.groupby(hyperparemeters).agg(agg)
    x.reset_index(inplace=True)

    # print every hyperparameter combination and its average val power
    # sorted by val power mean

    # get row with max val power mean
    row = x.loc[x[("validation_power", "mean")].idxmax()]
    # print val and eval power

    best_hp = row[hyperparemeters].to_dict()
    best_hp = {k[0]: v for k, v in best_hp.items()}
    exp_params = runs.iloc[0][exp_parameters].to_dict()
    
    best_hp_df.loc[len(best_hp_df)] = {**{"filtered_name": exp}, **best_hp, **exp_params}

    # Save eval results, to allow preliminary analysis
    filename = ""
    # if 4wt in filtered name
    if "4wt" in exp:
        filename += "4Symm"
    elif "_nt8_" in exp:
        filename += "8LHS"
    elif "_nt16_" in exp:
        filename += "16LHS"
    filename += "_"
    if exp_params["dynamic_mode"] == "observation_points":
        filename += "DynOP"
        if exp_params["privileged"]:
            # MDs are [25, 75, 125] and [100, 150, 200]
            if exp_params["mast_distancing"] in [25, 100]:
                filename += "_p0"
            elif exp_params["mast_distancing"] in [75, 150]:
                filename += "_p1"
            else:
                filename += "_p2"
    elif exp_params["changing_wind"] == True:
        filename += "CW"
    else:
        filename += "FW"
        
    if "recPPO" in exp:
        filename += "_RecPPO"
    else:
        filename += "_PPO"
    filename += "_total_powers"
    score = np.array([[[row[('eval_power', 'mean')]]]])
    # Shape is (1,1,1)
    # We have both runs, we plot the std of the average eval power over the eval runs
    np.save(f"data/eval/scores/{filename}.npy", score)
    np.save(f"data/eval/scores/{filename}_std.npy", np.array([[[row[('eval_power', 'std')]]]]))
    
    print(f"{exp} ({filename})")
    print(f"Validation power: {row[('validation_power', 'mean')]} +- {row[('validation_power', 'std')]}")
    print(f"Evaluation power: {row[('eval_power', 'mean')]} +- {row[('eval_power', 'std')]}")
    print("")
print(best_hp_df)
best_hp_df.to_csv(f"data/eval/scores/{MODE}_best_hp.csv")
