## Compile the redox potentials
We want to see what the energy differences for different methods are


In [1]:
import pandas as pd
from ase.db import connect

## Load the Database into Pandas
Get the energies for each system for each NWChem setting

In [2]:
data = []
with connect('data.db') as db:
    for row in db.select(''):
        record = row.key_value_pairs.copy()
        record['energy'] = row.energy
        data.append(record)
data = pd.DataFrame(data)
print(f'Loaded {len(data)} computations')

Loaded 98 computations


## Compute the redox potentials
Get the energy differences between different stats

In [3]:
method_keys = ['pre_relax_threshold', 'pre_basis_set', 'basis_set', 'xc']
records = []
for level_id, (level, subset) in enumerate(data.groupby(method_keys)):
    method_desc = dict(zip(method_keys, level))
    
    # Match states
    neutrals = subset.query('state=="neutral"')
    for state in ['oxidized', 'reduced']:
        redoxed = subset.query(f'state=="{state}"')
        merged = redoxed.merge(neutrals, on='inchi_key')
        
        # Create a record for each
        for _, row in merged.iterrows():
            record = {}
            record['inchi_key'] = row['inchi_key']
            record['state'] = state
            record['potential'] = row['energy_x'] - row['energy_y']
            record['runtime'] = row['runtime_x'] + row['runtime_y']
            record['guess_runtime'] = row['guess_runtime_x'] + row['guess_runtime_y']
            record['level_id'] = level_id
            record.update(method_desc)
            records.append(record)
records = pd.DataFrame(records)

In [4]:
records.to_csv('nwchem-redox-potentials.csv', index=False)