# Summarize Runs
Summarize the runs that have been performed to date

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from datetime import datetime
from glob import glob
import pandas as pd
import json
import re
import os

## Find the Result Directories
They each contain a `runtime.log` file

In [2]:
models = glob(os.path.join('runs', '**', 'runtime.log'))

In [3]:
def load_models(log_path):
    """Get the information from the run in
    
    Args:
        log_path (str): Path to the runtime log
    """
    
    # Store the path
    run_dir = os.path.dirname(log_path)
    path_name = os.path.basename(run_dir)
    output = {
        'path': run_dir,
        'param_hash': path_name.split("-")[-1],
        'start_time': datetime.strptime(path_name[:13], "%d%b%y-%H%M%S")
    }
    
    # Get the run parameters
    with open(os.path.join(run_dir, 'run_params.json')) as fp:
        output.update(json.load(fp))
    for p in ['redishost', 'redisport']:
        del output[p]
        
    # Load in the system information
    with open(os.path.join(run_dir, 'host_info.json')) as fp:
        output['hostname'] = json.load(fp)['hostname']
        
    # Load in the quantum chemistry information
    with open(output['qc_spec']) as fp:
        qcspec = json.load(fp)
    output['program'] = qcspec['program']
        
    # Count the number of workers
    #  TODO (wardlt): Divide up by manager type
    workers = 0
    for m in glob(os.path.join(run_dir, '**', 'manager.log'), recursive=True):
        with open(m) as fp:
            workers += int(re.findall('Manager will spawn (\d+) workers', fp.read(), re.MULTILINE)[0])
    output['worker_count'] = workers
        
    # Get the number evaluated
    result_file = os.path.join(run_dir, 'simulation_records.jsonld')
    if os.path.isfile(result_file):
        with open(result_file) as fp:
            output['n_evals'] = len(fp.readlines())
    
    return output

In [4]:
results = pd.DataFrame(load_models(m) for m in models).sort_values('start_time')
results[['path', 'n_evals', 'worker_count']].tail(3)

Unnamed: 0,path,n_evals,worker_count
9,runs/26Aug20-152230-98c2bf,989,114
4,runs/26Aug20-161834-a5ffd9,1315,114
0,runs/26Aug20-162040-a5ffd9,4422,114


In [5]:
results.to_csv('run_data.csv', index=False)