# Consolidate Results
Get the Hessians for all molecules we ran, collapse into a comparison table where we compare against the exact Hessaian at MP2/AVTZ.

In [None]:
from jitterbug.compare import compare_hessians
from dataclasses import asdict
from pathlib import Path
from ase.io import read
from tqdm import tqdm
import pandas as pd
import numpy as np

Configuration

In [None]:
top_method = 'mp2'
top_basis = 'aug-cc-pvtz'

## Find All Runs
Get all runs of jitterbug and store the path, settings, and execution time.

In [None]:
exact_runs = [x.parent for x in Path('../../run/').rglob('hessian.npy')]
print(f'Found {len(exact_runs)} exact Hessians')

In [None]:
def load_run(path: Path) -> dict:
    """Load information about a run
    
    Args:
        path: Path to the run file
    Returns:
        Dictionary describing the run
    """
    
    # Start by getting the run metadata
    mol_name = path.parent.name
    method, basis, _ = path.name.split("_")
    
    # Get molecule details
    atoms = read(path / f'{mol_name}.xyz')
    formula = atoms.get_chemical_formula()
    heavy_count = sum(1 for x in atoms.get_chemical_symbols() if x != 'H')
    
    # Load in the run files
    result = pd.read_json(path / 'simulation-results.json', lines=True).query('success')
    run_time = result['time_running'].sum()
    run_time_no_startup = result['time_running'].median() * len(result)
    
    # Guess the system
    hostname = result.iloc[0]['worker_info']['hostname']
    if hostname.startswith('nid'):
        host = 'theta'
    else:
        host = hostname
    
    
    return {
        'path': path,
        'atoms': atoms,
        'geometry': mol_name,
        'formula': formula,
        'heavy_atoms': heavy_count,
        'n_atoms': len(atoms),
        'method': method,
        'basis': basis,
        'num_energies': len(result), 
        'host': host,
        'cost': run_time / 3600.,
        'cost_corrected': run_time_no_startup / 3600.
    }

In [None]:
all_runs = pd.DataFrame(map(load_run, tqdm(exact_runs)))

## Compare to Top Level
Compare each Hessian to that of the top level

In [None]:
top_mols = all_runs.query(f'method=="{top_method}" and basis=="{top_basis}"')
print(f'Ran {len(top_mols)} molecules at {top_method}//{top_basis}')

Filter down to only molecules with the top level

In [None]:
completed_mols = all_runs[all_runs['geometry'].apply(set(top_mols['geometry'].tolist()).__contains__)]
print(f'Have a total of {len(completed_mols)} at this level')

Run the comparisons

In [None]:
comparisons = []
for rid, row in top_mols.iterrows():
    # Get the exact answer
    exact_hess = np.load(row['path'] / 'hessian.npy')
    atoms = row['atoms']
    
    # Run the comparisons
    for rid, row in completed_mols.query(f'geometry=="{row["geometry"]}"').iterrows():
        my_hess = np.load(row['path'] / 'hessian.npy')
        comparison = compare_hessians(atoms, exact_hess, my_hess)
        comparisons.append({**row, **asdict(comparison)})
comparisons = pd.DataFrame(comparisons)

Save there results

In [None]:
comparisons.to_csv(f'comparison-to-{top_method}_{top_basis}.csv', index=False)