In [234]:
import wandb
import pandas as pd
import os
from tqdm import tqdm

# from table_plotter import print_result_table

In [235]:
api = wandb.Api(timeout=600)


In [236]:
# get all runs for a project
len(runs)

43

In [237]:
# Specify cache directory
cache_dir = "./wandb_cache"
os.makedirs(cache_dir, exist_ok=True)

In [238]:
dfs = []
skipped_runs = []  # List to store IDs of skipped runs

evaluation_keys = ['Evaluation/acc_imp_perc', 'Evaluation/exist_imp_perc', 'Evaluation/reach_imp_perc', 'Evaluation/path_length',
                   'Evaluation/fn_imp_perc', 'Evaluation/fp_imp_perc', 'Evaluation/tn_imp_perc', 'Evaluation/tp_imp_perc', 
                   'Evaluation/solvability', 'Evaluation/playability']
                     

In [239]:
def get_dataframe_from_run(run):
    
    for run in tqdm(runs):
    
        # Define cache filename based on run ID
        cache_file = os.path.join(cache_dir, f"{run.id}.csv")
        
        # Check if cached file exists
        if os.path.exists(cache_file):
            # Load cached DataFrame
            df = pd.read_csv(cache_file)
        else:
            df = run.history(keys=["Evaluation/llm_iteration", *evaluation_keys[:1]])
    
            def append_key(src_df, key):
    
                tgt_df = run.history(keys=[key, "Evaluation/llm_iteration"])
                src_df = pd.merge(src_df, tgt_df, on="Evaluation/llm_iteration", how="outer")
                src_df = src_df.drop(columns=["_step_x", "_step_y"], errors="ignore")
                return src_df
    
            for key in evaluation_keys[1:]:
                try:
                    df = append_key(df, key)
                except Exception as e:
                    print(f"Error: {e} at run ID: {run.id}")
    
            
            # Add run config to DataFrame with prefix 'config.'
            for key, value in run.config.items():
                if isinstance(value, list):
                    value = ",".join(map(str, value))  # Convert list to comma-separated string
                df[key] = value
    
            # 기본값 설정
            default_values = {'n_self_alignment': 0, 'feedback_type': 'default'}
            # 열이 없을 경우 기본값으로 채워 넣기
            for col, value in default_values.items():
                if col not in df.columns:
                    df[col] = value
            
             
            # Filter columns
            key_filter = ['run_id', 'final_state', 'target_character', 'pe', 'branch_factor', 'exp_name', 'evaluator', 'total_iterations', 'n_self_alignment', 'feedback_type', 'total_timesteps', 
                          'reward_feature', 'fewshot', 'problem', 'seed', 
                          'Evaluation/llm_iteration'] + evaluation_keys
            auxiliary_key_filter = []
            
            df['run_id'] = run.id  # Add run ID as a column
            df['final_state'] = run.name
            
            try:
                df = df[key_filter + auxiliary_key_filter]
            except KeyError:
                df = df[key_filter]
            
            # Save DataFrame to cache as CSV
            df.to_csv(cache_file, index=False)
        
        dfs.append(df)
    
    # Concatenate all DataFrames
    df = pd.concat(dfs, ignore_index=True)
    
    return df

In [240]:
runs = api.runs("inchangbaek4907/scenario")
scenario_df = get_dataframe_from_run(runs)
scenario_df

100%|██████████| 160/160 [11:21<00:00,  4.26s/it]


Unnamed: 0,run_id,final_state,target_character,pe,branch_factor,exp_name,evaluator,total_iterations,n_self_alignment,feedback_type,...,Evaluation/acc_imp_perc,Evaluation/exist_imp_perc,Evaluation/reach_imp_perc,Evaluation/path_length,Evaluation/fn_imp_perc,Evaluation/fp_imp_perc,Evaluation/tn_imp_perc,Evaluation/tp_imp_perc,Evaluation/solvability,Evaluation/playability
0,yr1ucu3i,pe-io_it-1_fit-hr_exp-def_t-sce_chr-2_1_s-2,2,io,2,def,hr,1,0,default,...,0.000000,0.816667,0.216667,0.000000,3.0,0.0,0.000000,0.000000,0.000000,0.0
1,1kmo2fe0,pe-io_it-1_fit-hr_exp-def_t-sce_chr-1_1_s-1,1,io,2,def,hr,1,0,default,...,0.166667,1.000000,1.000000,26.000002,2.5,0.0,0.333333,0.166667,0.166667,1.0
2,ys72jt1l,pe-io_it-1_fit-hr_exp-def_t-sce_chr-1_1_s-2,1,io,2,def,hr,1,0,default,...,0.000000,0.866667,0.233333,0.000000,3.0,0.0,0.000000,0.000000,0.000000,0.0
3,waf7vmdz,pe-io_it-1_fit-hr_exp-def_t-sce_chr-3_1_s-1,3,io,2,def,hr,1,0,default,...,0.000000,0.555556,0.100000,0.000000,3.0,0.0,0.000000,0.000000,0.000000,0.0
4,3qc40cdf,pe-io_it-1_fit-hr_exp-def_t-sce_chr-2_1_s-1,2,io,2,def,hr,1,0,default,...,0.000000,0.800000,0.200000,0.000000,3.0,0.0,0.000000,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,xvccdq3u,pe-got_it-9_fit-hr_exp-sa_t-sce_chr-3_1_s-8,3,got,2,sa,hr,9,5,default,...,0.300000,1.000000,0.877778,28.444445,2.1,0.0,0.000000,0.900000,0.233333,0.3
878,xvccdq3u,pe-got_it-9_fit-hr_exp-sa_t-sce_chr-3_1_s-8,3,got,2,sa,hr,9,5,default,...,1.000000,1.000000,1.000000,26.133335,0.0,0.0,0.000000,3.000000,1.000000,1.0
879,xvccdq3u,pe-got_it-9_fit-hr_exp-sa_t-sce_chr-3_1_s-8,3,got,2,sa,hr,9,5,default,...,0.900000,1.000000,0.988889,26.444445,0.3,0.0,0.000000,2.700000,0.900000,0.9
880,xvccdq3u,pe-got_it-9_fit-hr_exp-sa_t-sce_chr-3_1_s-8,3,got,2,sa,hr,9,5,default,...,1.000000,1.000000,1.000000,26.066668,0.0,0.0,0.000000,3.000000,0.966667,1.0


In [241]:
runs = api.runs("inchangbaek4907/feedback")
feedback_df = get_dataframe_from_run(runs)
feedback_df

100%|██████████| 43/43 [03:18<00:00,  4.62s/it]


Unnamed: 0,run_id,final_state,target_character,pe,branch_factor,exp_name,evaluator,total_iterations,n_self_alignment,feedback_type,...,Evaluation/acc_imp_perc,Evaluation/exist_imp_perc,Evaluation/reach_imp_perc,Evaluation/path_length,Evaluation/fn_imp_perc,Evaluation/fp_imp_perc,Evaluation/tn_imp_perc,Evaluation/tp_imp_perc,Evaluation/solvability,Evaluation/playability
0,yr1ucu3i,pe-io_it-1_fit-hr_exp-def_t-sce_chr-2_1_s-2,2,io,2,def,hr,1,0,default,...,0.000000,0.816667,0.216667,0.000000,3.000000,0.0,0.000000,0.000000,0.000000,0.000000
1,1kmo2fe0,pe-io_it-1_fit-hr_exp-def_t-sce_chr-1_1_s-1,1,io,2,def,hr,1,0,default,...,0.166667,1.000000,1.000000,26.000002,2.500000,0.0,0.333333,0.166667,0.166667,1.000000
2,ys72jt1l,pe-io_it-1_fit-hr_exp-def_t-sce_chr-1_1_s-2,1,io,2,def,hr,1,0,default,...,0.000000,0.866667,0.233333,0.000000,3.000000,0.0,0.000000,0.000000,0.000000,0.000000
3,waf7vmdz,pe-io_it-1_fit-hr_exp-def_t-sce_chr-3_1_s-1,3,io,2,def,hr,1,0,default,...,0.000000,0.555556,0.100000,0.000000,3.000000,0.0,0.000000,0.000000,0.000000,0.000000
4,3qc40cdf,pe-io_it-1_fit-hr_exp-def_t-sce_chr-2_1_s-1,2,io,2,def,hr,1,0,default,...,0.000000,0.800000,0.200000,0.000000,3.000000,0.0,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1183,mok07ipx,pe-tot_it-9_fit-hr_exp-feedback_t-sce_chr-1_1_...,1,tot,2,feedback,hr,9,0,no,...,0.044444,0.600000,0.200000,28.000000,2.866667,0.0,0.133333,0.000000,0.066667,0.200000
1184,mok07ipx,pe-tot_it-9_fit-hr_exp-feedback_t-sce_chr-1_1_...,1,tot,2,feedback,hr,9,0,no,...,0.344444,0.800000,0.600000,27.000000,1.966667,0.0,0.733333,0.300000,0.366667,0.600000
1185,mok07ipx,pe-tot_it-9_fit-hr_exp-feedback_t-sce_chr-1_1_...,1,tot,2,feedback,hr,9,0,no,...,0.000000,0.600000,0.100000,0.000000,3.000000,0.0,0.000000,0.000000,0.000000,0.000000
1186,mok07ipx,pe-tot_it-9_fit-hr_exp-feedback_t-sce_chr-1_1_...,1,tot,2,feedback,hr,9,0,no,...,0.000000,0.600000,0.100000,0.000000,3.000000,0.0,0.000000,0.000000,0.000000,0.000000


In [242]:
# Print summary of skipped runs
print("\nSummary of Skipped Runs:")
print(f"Total skipped runs: {len(skipped_runs)}")
print("Skipped run IDs:", skipped_runs)


Summary of Skipped Runs:
Total skipped runs: 0
Skipped run IDs: []


In [243]:
df = pd.concat([scenario_df, feedback_df], ignore_index=True)

In [244]:
time_str = pd.Timestamp.now().strftime("%Y-%m-%d-%H-%M-%S")

In [245]:
df.to_csv(f"wandb_output_{time_str}.csv", index=False)