# Summarize Runs
Summarize the runs that have been performed to date

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from datetime import datetime
from glob import glob
import pandas as pd
import json
import re
import os

## Find the Result Directories
They each contain a `runtime.log` file

In [2]:
models = glob(os.path.join('runs', '**', 'runtime.log'))

In [3]:
def load_models(log_path):
    """Get the information from the run in
    
    Args:
        log_path (str): Path to the runtime log
    """
    
    # Store the path
    run_dir = os.path.dirname(log_path)
    path_name = os.path.basename(run_dir)
    output = {
        'path': run_dir,
        'param_hash': path_name.split("-")[-1],
        'start_time': datetime.strptime(path_name[:13], "%d%b%y-%H%M%S")
    }
    
    # Get the run parameters
    with open(os.path.join(run_dir, 'run_params.json')) as fp:
        output.update(json.load(fp))
    for p in ['redishost', 'redisport']:
        del output[p]
        
    # Load in the system information
    with open(os.path.join(run_dir, 'host_info.json')) as fp:
        output['hostname'] = json.load(fp)['hostname']
        
    # Count the number of workers
    #  TODO (wardlt): Divide up by manager type
    for m in glob(os.path.join(run_dir, '**', 'manager.log'), recursive=True):
        workers = 0
        with open(m) as fp:
            workers += int(re.findall('Manager will spawn (\d+) workers', fp.read(), re.MULTILINE)[0])
    output['worker_count'] = workers
        
    # Get the number evaluated
    with open(os.path.join(run_dir, 'simulation_records.jsonld')) as fp:
        output['n_evals'] = len(fp.readlines())
    
    return output

In [4]:
results = pd.DataFrame(load_models(m) for m in models)
results

Unnamed: 0,path,param_hash,start_time,parallel_guesses,rl_episodes,search_size,initial_count,hostname,worker_count,n_evals
0,runs/23Jun20-233952-d25748,d25748,2020-06-23 23:39:05,1,10,10,10,logan-ubuntu,2,0
1,runs/23Jun20-234044-1e228d,1e228d,2020-06-23 23:40:04,2,4,10,2,logan-ubuntu,2,11
2,runs/24Jun20-001019-c7a488,c7a488,2020-06-24 00:10:01,2,4,4,2,logan-ubuntu,2,7


In [5]:
results.to_csv('run_data.csv', index=False)