# Post-Process Gaussian Data
Compute the ionization potential and electron affinity

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from moldesign.simulate.qcfractal import f
from qcelemental.physical_constants import constants
import pandas as pd



## Load in the data
Get the data from an output of Naveen's computations

In [2]:
gaussian_data = pd.read_csv('seed-datasets/g4mp2_results.csv')
print(f'Loaded {len(gaussian_data)} molecules')

Loaded 2744 molecules


## Compute the IP and EA
At the G4MP2 level

In [3]:
def compute_ea_ip(level='b3lyp'):
    # Compute the EA and IP in each solvent we have
    for label, name in zip(['reduced', 'oxidized'], ['EA', 'IP']):
        # Prefactor
        p = -1 if name == "EA" else 1

        # Compute the potential in gas
        g_chg = gaussian_data[f'u0_{label}.{level}'] - gaussian_data[f'u0.{level}']
        g_chg_u = constants.ureg.Quantity(g_chg.values * constants.hartree2kcalmol, 'kcal/mol')
        gaussian_data[name] = (p * g_chg_u / f).to("V").magnitude

        # Correct for solvent
        sol_cols = ['water', 'acetonitrile', 'ethanol', 'dimethylsulfoxide', 'acetone']
        for solv in sol_cols:

            solv_neu = gaussian_data[f'solvent_neutral.B3LYP_{solv}'] / constants.hartree2kcalmol
            solv_chg = gaussian_data[f'solvent_{label}.B3LYP_{solv}'] / constants.hartree2kcalmol
            g_solv = constants.ureg.Quantity(
                (g_chg + solv_chg - solv_neu).values * constants.hartree2kcalmol, 'kcal/mol')
            e_red = p * g_solv / (1 * f)
            gaussian_data[f'{name}_{solv}'] = e_red.to("V").magnitude
compute_ea_ip('g4mp2')

Remove outliers

In [4]:
gaussian_data.query('IP > 3 and EA > -4 and EA < 5', inplace=True)

In [5]:
solv_cols = [c for c in gaussian_data.columns if c.startswith('solv')]

In [6]:
gaussian_data = gaussian_data[~(gaussian_data[solv_cols] < -200).any(axis=1)]

In [7]:
gaussian_data.to_csv('datasets/gaussian-redox.csv', index=False)