# Summarize Runs
Summarize the runs that have been performed to date

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from datetime import datetime
from pathlib import Path
import pandas as pd
import json
import re

## Find the Result Directories
They each contain a `results.json` file

In [2]:
runs = list(Path('runs').rglob('results.json'))
print(f'Found {len(runs)} runs')

Found 4 runs


In [5]:
def load_run_information(log_path):
    """Get the information from the run in
    
    Args:
        log_path (str): Path to the runtime log
    """
    
    # Store the path
    run_dir = log_path.parent
    path_name = log_path.parent.name
    output = {
        'path': str(run_dir),
        'start_time': datetime.strptime(path_name.split("-", 3)[-1], "%d%m%y-%H%M%S"),
    }
    
    # Get the run parameters
    with open(run_dir / 'params.json') as fp:
        output.update(json.load(fp))
        
    # Count the number of workers
    #  TODO (wardlt): Divide up by manager type
    workers = 0
    for m in run_dir.rglob('manager.log'):
        with open(m) as fp:
            workers += int(re.findall('Manager will spawn (\d+) workers', fp.read(), re.MULTILINE)[0])
    output['worker_count'] = workers
        
    # Get the number evaluated
    with open(run_dir / 'results.json') as fp:
        output['n_evals'] = len(fp.readlines())
    
    return output

In [6]:
results = pd.DataFrame(load_run_information(r) for r in runs)
results

Unnamed: 0,path,start_time,num_guesses,num_parallel,dim,opt_delay,runtime,runtime_var,file,worker_count,n_evals
0,runs/streaming-N100-P10-010124-204640,2024-01-01 20:46:40,100,10,4,0.0,2,0.1,streaming.py,10,100
1,runs/interleaved-N100-P10-010124-164354,2024-01-01 16:43:54,100,10,4,2.0,2,0.1,interleaved.py,11,100
2,runs/streaming-N100-P10-010124-165825,2024-01-01 16:58:25,100,10,4,0.0,2,0.1,streaming.py,10,100
3,runs/batch-N100-P10-010124-164823,2024-01-01 16:48:23,100,10,4,,2,4.0,batch.py,10,100


In [7]:
results.to_csv('run_data.csv', index=False)