# Get training curves from W&B logs

In [1]:
import pandas as pd
import wandb
from tqdm import tqdm

In [2]:
def get_project_run_histories(project_name, entity='Awni00', attr_cols=('group', 'name'), config_cols='all'):
    '''gets the log history of all runs in a project'''

    def get_run_history(run):
        history_scan = run.scan_history()
        try:
            keys = history_scan.next().keys()
        except StopIteration:
            print(f'{run.name} has no history. Skipping...')
            return pd.DataFrame()
        run_history_data = {key: [] for key in keys}
        for row in history_scan:
            for key in keys:
                run_history_data[key].append(row[key])
        return pd.DataFrame(run_history_data)

    api = wandb.Api(timeout=60)

    runs = api.runs(entity + "/" + project_name)

    if config_cols == 'all':
        config_cols = set().union(*tuple(run.config.keys() for run in runs))

    run_history_dfs = []

    for run in tqdm(runs, leave=False):
        run_history = get_run_history(run)

        for config_col in config_cols:
            run_history[config_col] = run.config.get(config_col, None)

        for attr_col in attr_cols:
            run_history[attr_col] = getattr(run, attr_col, None)

        run_history_dfs.append(run_history)

    runs_history_df = pd.concat(run_history_dfs, axis=0)

    runs_history_df = runs_history_df.reset_index(drop=True)

    return runs_history_df

In [3]:
api = wandb.Api()
projects = [project for project in api.projects('awni00') if 'math-' in project.name]

In [4]:
projects

[<Project awni00/math-calculus__differentiate>,
 <Project awni00/math-polynomials__coefficient_named>,
 <Project awni00/math-algebra__sequence_next_term>,
 <Project awni00/math-algebra__linear_1d>,
 <Project awni00/math-polynomials__expand>,
 <Project awni00/math-comparison__sort>,
 <Project awni00/math-polynomials__add>,
 <Project awni00/math-polynomials__evaluate>,
 <Project awni00/math-comparison__kth_biggest>,
 <Project awni00/math-polynomials__collect>,
 <Project awni00/math-polynomials__compose>]

In [5]:
save_dir = 'project_run_histories'

config_cols = ['trial']
attr_cols = ['group', 'name']
project_dfs = []
for project in tqdm(projects):
    task_name = project.name.split('-')[1]
    project_df = get_project_run_histories(
        project_name=project.name, entity='awni00', attr_cols=attr_cols, config_cols=config_cols)
    project_df.to_csv(f'{save_dir}/run_history_{project.name}.csv')
    project_df['task'] = task_name
    project_dfs.append(project_df)

projects_df = pd.concat(project_dfs)

[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.
  0%|          | 0/11 [14:10:32<?, ?it/s]


KeyboardInterrupt: 

In [None]:
projects_df.to_csv(f'{save_dir}/project_run_histories.csv')