In [None]:
import pandas
import automol
import chemkin_io
import ioformat
import autoreact

In [None]:
INP_SPC_PATH = 'mechanisms/nuigmech1.1.csv'
INP_RXN_PATH = 'mechanisms/nuigmech1.1.txt'

In [None]:
# 1. read in the species CSV
SPC_DF = pandas.read_csv(INP_SPC_PATH, quotechar='"', index_col=0)

In [None]:
# clean up and fill in the species dataframe
# drop excited species
SPC_DF.drop(SPC_DF[SPC_DF['smiles'].isnull()].index, inplace=True)
SPC_DF.drop(SPC_DF[SPC_DF['smiles'].str.contains('excited')].index, inplace=True)
# add neutral charge
SPC_DF['charge'] = 0
# add multiplicities
TRIP_CHIS = [
    'InChI=1S/O',
    'InChI=1S/O2/c1-2',
    'InChI=1S/CH2/h1H2',
    'InChI=1S/C3H2/c1-3-2/h1-2H',
    'InChI=1S/C3H2/c1-3-2/h1H2',
    'InChI=1S/C6H6O/c7-6-4-2-1-3-5-6/h1-2,4-5H,3H2',
    'InChI=1S/C2H3N/c1-2-3/h2H,1H2',
]
SPC_DF['inchi'] = SPC_DF['inchi'].str.replace('.*InChI=', 'InChI=', regex=True)
SPC_DF['mult'] = SPC_DF.apply(
    (
        lambda x:
        1 if 'singlet' in x['smiles'] else
        3 if 'triplet' in x['smiles'] or x['inchi'] in TRIP_CHIS else
        automol.chi.low_spin_multiplicity(x['inchi'])
    ),
    axis=1
)
SPC_DF['smiles'] = SPC_DF['smiles'].str.replace('triplet', '')
SPC_DF['smiles'] = SPC_DF['smiles'].str.replace('singlet', '')
# add formulas (used in filtering below)
SPC_DF['fml'] = SPC_DF['inchi'].map(automol.chi.formula)
# remove stereo from InChIs
SPC_DF['inchi'] = SPC_DF['inchi'].map(automol.inchi.without_stereo)

In [None]:
# Catch bad InChIs/SMILES by looking for mismatches
ICH_FROM_SMI = SPC_DF['smiles'].map(automol.smiles.inchi)
IDXS = SPC_DF.index[SPC_DF['inchi'] != ICH_FROM_SMI].tolist()
if IDXS:
    print(f"InChI/SMILES mismatches: {IDXS}")
else:
    print("All InChI/SMILES mismatches are fixed!")
assert not IDXS
# re-calculate ChIs from SMILES, assigning AMChIs to tautomers
SPC_DF['inchi'] = SPC_DF['smiles'].map(automol.smiles.chi)

In [None]:
# set 'name' as the species DataFrame index, for easy look-ups
SPC_DF.set_index('name', inplace=True)

In [None]:
# 2. read in the mechanism
MECH_STR = ioformat.pathtools.read_file('.', INP_RXN_PATH)
RXN_DCT = chemkin_io.parser.mechanism.reactions(MECH_STR)

In [None]:
# clean up the reaction list, removing dropped excited species
# also, replace kinetic parameters with dummy values
DUMMY_PARAMS = autoreact.params.RxnParams(
    arr_dct={'arr_tuples': [[1, 0, 0]]},
)
RXN_DCT = {rxn: DUMMY_PARAMS for rxn in RXN_DCT
           if all(s in SPC_DF.index for s in rxn[0]+rxn[1])}

In [None]:
# 3. filter to select all C6-7 reactions
def c67_filter(rxn, df):
    rfmls = [df.loc[s]['fml'] for s in rxn[0]]
    pfmls = [df.loc[s]['fml'] for s in rxn[1]]
    rfml = automol.form.join_sequence(rfmls)
    pfml = automol.form.join_sequence(pfmls)
    return (rfml == pfml and
            'C' in rfml and
            set(rfml) <= set("CHO") and
            6 <= rfml['C'] <= 7)

C67_RXN_DCT = {r: RXN_DCT[r] for r in RXN_DCT
               if c67_filter(r, SPC_DF)}
print(f"FULL COUNT: {len(RXN_DCT)}")
print(f"C6-7 SUBSET COUNT: {len(C67_RXN_DCT)}")
C67_SPC_SET = {s for r, p, _ in C67_RXN_DCT for s in r+p}
C67_SPC_LST = [s for s in SPC_DF.index if s in C67_SPC_SET]
C67_SPC_DF = SPC_DF.loc[C67_SPC_LST]

In [None]:
# 4. write C6-7 submechanism
C67_SPC_PATH = 'mechanisms/01_raw_nuig-c6-7.csv'
C67_RXN_PATH = 'mechanisms/01_raw_nuig-c6-7.txt'

# a. write the species file
C67_SPC_DF.drop(columns=['fml']).to_csv(C67_SPC_PATH, quotechar="'")

# b. write the reaction file
C67_SPC_DCT = C67_SPC_DF.to_dict('index')
C67_MECH_STR = chemkin_io.writer.mechanism.write_chemkin_file(
    rxn_param_dct=C67_RXN_DCT,
    mech_spc_dct=C67_SPC_DCT,
)
ioformat.pathtools.write_file(C67_MECH_STR, '.', C67_RXN_PATH)

In [None]:
# 5. filter to select all Pyro reactions
def pyro_filter(rxn, df):
    return all(set(df.loc[s]['fml']) <= set('CH')
               for s in rxn[0] + rxn[1])

PYRO_RXN_DCT = {r: RXN_DCT[r] for r in RXN_DCT
               if pyro_filter(r, SPC_DF)}
print(f"FULL COUNT: {len(RXN_DCT)}")
print(f"PYRO SUBSET COUNT: {len(PYRO_RXN_DCT)}")
PYRO_SPC_SET = {s for r, p, _ in PYRO_RXN_DCT for s in r+p}
PYRO_SPC_LST = [s for s in SPC_DF.index if s in PYRO_SPC_SET]
PYRO_SPC_DF = SPC_DF.loc[PYRO_SPC_LST]

In [None]:
# 6. write Pyro submechanism
PYRO_SPC_PATH = 'mechanisms/01_raw_nuig-pyro.csv'
PYRO_RXN_PATH = 'mechanisms/01_raw_nuig-pyro.txt'

# a. write the species file
PYRO_SPC_DF.drop(columns=['fml']).to_csv(PYRO_SPC_PATH, quotechar="'")

# b. write the reaction file
PYRO_SPC_DCT = PYRO_SPC_DF.to_dict('index')
PYRO_MECH_STR = chemkin_io.writer.mechanism.write_chemkin_file(
    rxn_param_dct=PYRO_RXN_DCT,
    mech_spc_dct=PYRO_SPC_DCT,
)
ioformat.pathtools.write_file(PYRO_MECH_STR, '.', PYRO_RXN_PATH)