In [1]:
import numpy as np
import wandb
import pandas as pd
from tqdm import tqdm

In [2]:
api = wandb.Api(timeout=600)

In [3]:
runs = api.runs("inchangbaek4907/scenario-aux")
len(runs)

55

In [4]:
runs = list(runs)
runs[:5]

[<Run inchangbaek4907/scenario-aux/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b1w0_chr-1_1_s-1 (finished)>,
 <Run inchangbaek4907/scenario-aux/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b1w0_chr-2_1_s-1 (finished)>,
 <Run inchangbaek4907/scenario-aux/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b1w0_chr-1_1_s-3 (finished)>,
 <Run inchangbaek4907/scenario-aux/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b1w0_chr-2_1_s-6 (finished)>,
 <Run inchangbaek4907/scenario-aux/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b1w0_chr-1_1_s-6 (finished)>]

In [5]:

class RunInfo:
    config: dict
    name: str
    run_object: wandb.apis.public.Run
    
    def __init__(self, config, name, run_object):
        self.config = config
        self.name = name
        self.run_object = run_object

    def __str__(self):
        showing_config = dict()
        showing_keys = ['gpt_model']
        
        for key in showing_keys:
            if key in self.config:
                showing_config[key] = self.config[key]
        
        return f"RunInfo(\n\tname={self.name},\n\tconfig={showing_config}\n)"

In [6]:
run_infos = list()

for run in tqdm(runs):
    if run.state == 'finished' and run.summary['Evaluation/llm_iteration'] < 6:
        run_infos.append(RunInfo(run.config, run.name, run))

len(run_infos)

100%|██████████| 55/55 [00:00<00:00, 20801.33it/s]


5

In [7]:
configs = list()
for run_info in tqdm(run_infos[:]):
    configs.append(run_info.config)
    
df = pd.DataFrame(configs)
df

100%|██████████| 5/5 [00:00<00:00, 67650.06it/s]


Unnamed: 0,DEBUG,activation,bypass_feedback_path,NUM_MINIBATCHES,total_iterations,render_freq,max_board_scans,eval_randomize_map_shape,ANNEAL_LR,n_codegen_trials,...,n_samples,NUM_UPDATES,exp_dir,seed,VF_COEF,randomize_map_shape,n_eval_envs,n_gpus,n_agents,hidden_dims
0,True,relu,,4,6,40,3,False,False,3,...,30,,saves/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b1w...,1,0.5,False,1,1,1,"[64, 256]"
1,True,relu,,4,6,40,3,False,False,3,...,30,,saves/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b1w...,2,0.5,False,1,1,1,"[64, 256]"
2,True,relu,,4,6,40,3,False,False,3,...,30,,saves/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b0w...,4,0.5,False,1,1,1,"[64, 256]"
3,True,relu,,4,6,40,3,False,False,3,...,30,,saves/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b1w...,6,0.5,False,1,1,1,"[64, 256]"
4,True,relu,,4,6,40,3,False,False,3,...,30,,saves/pe-got_it-6_fit-hr_exp-aux_t-sce_aux-b2w...,6,0.5,False,1,1,1,"[64, 256]"


In [15]:
commands = list()

def get_config(exp_group, target_character):
    return df[(df['exp_group'] == exp_group) & (df['target_character'] == target_character)].iloc[0]

for index, row in df.iterrows():
    
    config = get_config(row['exp_group'], row['target_character'])
    
    seed = row['seed']
    exp_group = row['exp_group']
    target_character = row['target_character']
    
    total_iterations = config['total_iterations']
    gpt_model = config['gpt_model']
    exp_name = config['exp_name']
    n_self_alignment = config['n_self_alignment']
    evaluator = config['evaluator']
    pe = config['pe']
    n_aux_worst = config['n_aux_worst']
    n_aux_best = config['n_aux_best']
    
    # if nan
    if pd.isna(n_self_alignment):
        n_self_alignment = 0
    n_self_alignment = int(n_self_alignment)
    

    command = f"bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe={pe} exp_name={exp_name} gpt_model={gpt_model} total_iterations={total_iterations} seed={seed} evaluator={evaluator} target_character={target_character} n_self_alignment={n_self_alignment} wandb_resume=must n_aux_worst={n_aux_worst} n_aux_best={n_aux_best}"
    commands.append(command)

commands

['bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe=got exp_name=aux gpt_model=gpt-4o total_iterations=6 seed=1 evaluator=hr target_character=1 n_self_alignment=0 wandb_resume=must n_aux_worst=0 n_aux_best=1',
 'bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe=got exp_name=aux gpt_model=gpt-4o total_iterations=6 seed=2 evaluator=hr target_character=2 n_self_alignment=0 wandb_resume=must n_aux_worst=0 n_aux_best=1',
 'bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe=got exp_name=aux gpt_model=gpt-4o total_iterations=6 seed=4 evaluator=hr target_character=2 n_self_alignment=0 wandb_resume=must n_aux_worst=1 n_aux_best=0',
 'bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe=got exp_name=aux gpt_model=gpt-4o total_iterations=6 seed=6 evaluator=hr target_character=2 n_self_ali

In [17]:
commands_block = " \n".join(commands)
from IPython.display import display, Markdown
display(Markdown("```bash\n" + commands_block + ""))

```bash
bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe=got exp_name=aux gpt_model=gpt-4o total_iterations=6 seed=1 evaluator=hr target_character=1 n_self_alignment=0 wandb_resume=must n_aux_worst=0 n_aux_best=1 
bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe=got exp_name=aux gpt_model=gpt-4o total_iterations=6 seed=2 evaluator=hr target_character=2 n_self_alignment=0 wandb_resume=must n_aux_worst=0 n_aux_best=1 
bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe=got exp_name=aux gpt_model=gpt-4o total_iterations=6 seed=4 evaluator=hr target_character=2 n_self_alignment=0 wandb_resume=must n_aux_worst=1 n_aux_best=0 
bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe=got exp_name=aux gpt_model=gpt-4o total_iterations=6 seed=6 evaluator=hr target_character=2 n_self_alignment=0 wandb_resume=must n_aux_worst=1 n_aux_best=1 
bash sbatch_gpu.sh experiment.py n_envs=600 wandb_project=scenario-aux task=scenario overwrite=False pe=got exp_name=aux gpt_model=gpt-4o total_iterations=6 seed=6 evaluator=hr target_character=2 n_self_alignment=0 wandb_resume=must n_aux_worst=1 n_aux_best=2