# Summarize Runs
Summarize the runs that have been performed to date

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from datetime import datetime
from glob import glob
import pandas as pd
import json
import re
import os

## Find the Result Directories
They each contain a `runtime.log` file

In [2]:
models = glob(os.path.join('runs', '**', 'results.json'))

In [3]:
def load_models(log_path):
    """Get the information from the run in
    
    Args:
        log_path (str): Path to the runtime log
    """
    
    # Store the path
    run_dir = os.path.dirname(log_path)
    path_name = os.path.basename(run_dir)
    output = {
        'path': run_dir,
        'start_time': datetime.strptime(path_name.split("-", 3)[-1], "%d%m%y-%H%M%S"),
    }
    
    # Get the run parameters
    with open(os.path.join(run_dir, 'params.json')) as fp:
        output.update(json.load(fp))
    for p in ['redishost', 'redisport']:
        del output[p]
        
    # Count the number of workers
    #  TODO (wardlt): Divide up by manager type
    for m in glob(os.path.join(run_dir, '**', 'manager.log'), recursive=True):
        workers = 0
        with open(m) as fp:
            workers += int(re.findall('Manager will spawn (\d+) workers', fp.read(), re.MULTILINE)[0])
    output['worker_count'] = workers
        
    # Get the number evaluated
    with open(os.path.join(run_dir, 'results.json')) as fp:
        output['n_evals'] = len(fp.readlines())
    
    return output

In [4]:
results = pd.DataFrame(load_models(m) for m in models)
results

Unnamed: 0,path,start_time,num_guesses,num_parallel,dim,runtime,runtime_var,file,worker_count,n_evals
0,runs/batch-N20-P4-210720-173917,2020-07-21 17:39:17,20,4,4,5.0,3.0,batch.py,4,8


In [5]:
results.to_csv('run_data.csv', index=False)