In [1]:
# Script to add new mechanism's species and reactions to the database

In [2]:
# script to save the rankings for the mechanism
import os
import sys
import pandas as pd

import rmgpy.chemkin

## Load New Model

In [3]:
new_model_dir = '/work/westgroup/harris.se/autoscience/fuels/butane/small_lib_20230801/'
chemkin = os.path.join(new_model_dir, 'chem_annotated.inp')
species_dict = os.path.join(new_model_dir, 'species_dictionary.txt')
transport = os.path.join(new_model_dir, 'tran.dat')
species_list, reaction_list = rmgpy.chemkin.load_chemkin_file(chemkin, dictionary_path=species_dict, transport_path=transport, use_chemkin_names=True)


## Load Species Database

In [5]:
DFT_DIR = '/work/westgroup/harris.se/autoscience/reaction_calculator/dft'
species_csv = os.path.join(DFT_DIR, 'species_database.csv')
species_df = pd.read_csv(species_csv)

print(f'Species database contains {len(species_df)} unique species')

Species database contains 271 unique species


In [6]:
# Check if there are any new species:
species_to_add = []
for i, new_sp in enumerate(species_list):
    already_exists = False
    for db_species_adj_list in species_df['adjacency_list'].values:
        db_sp = rmgpy.species.Species().from_adjacency_list(db_species_adj_list)
        if db_sp.is_isomorphic(new_sp):
            already_exists = True
            break
    if already_exists:
        continue
    species_to_add.append(new_sp)


# add the new species
print('Added the following species to the database:')
addition_index = 0
for k, new_sp in enumerate(species_to_add):
    
    # first check that it's unique compared to everything before it in species_to_add
    is_unique = True
    for m in range(k):
        if new_sp.is_isomorphic(species_to_add[m]):
            is_unique = False
            break
    if not is_unique:
        continue
    
    
    name = new_sp.label
    smiles = new_sp.smiles
    # the split is for weird bug? where multiple adjacency lists end up in the species_list for a single species
    # maybe-only if you use chemkin names when loading the chemking
    adjacency_list = new_sp.to_adjacency_list().split('\n\n\n')[0]
    i = addition_index + len(species_df)
    
    
    print(f'\t{name}')
    species_df = species_df.append({'i': i, 'name': name, 'SMILES': smiles, 'adjacency_list': adjacency_list}, ignore_index=True)
    addition_index += 1



Added the following species to the database:
	C[C]C
	C[CH]C(C)OO
	C[CH]CC[O]
	CCC[O]
	CCCOO
	[O]OCCCOO
	C4H7O3
	C4H7O3
	CCCC[O]
	CCCCOO
	C4H8O2
	C3H6O3
	C4H7O2
	C4H7O3
	C[C]CC-2
	C4H6O2
	C3H3O2
	C2H2O2
	[O]OCCCCOO
	C4H8O
	[CH2]CCCO
	[CH2]CCO
	[O]OCCCCO
	O[CH]CCCOO
	[O]CCCCOO
	C4H8O3
	C4H7O2
	C4H7O2
	[O]OCCCO
	O[CH]CCOO
	C3H5O2
	C2H2O2
	C4H6O2
	C4H6O2
	OOCCCCOO
	C2H3O2
	C3H2O3
	C4H6O2
	C4H7O5
	C4H6O2
	C2H2O2
	CC1OC1C
	C4H9O4
	C4H8O3
	C4H9O4
	C4H10O4
	C3H2O3
	C2HO3
	C2H2O3
	C4H6O2
	C4H7O5
	C4H6O2
	C3H3O4
	C2HO3
	C3H3O4
	C3H2O3
	C3H2O3
	C3H4O6
	C3H3O6
	C2H2O3
	C3H2O5
	C3HO3
	C2HO2
	C2H4O4
	C2H3O4
	C2H3O3
	C3H3O6
	C4H7O3
	C4H7O5
	C3HO3
	C3HO3
	C3HO5
	C2HO3
	C2O4
	C4H8O3
	C3H2O5
	C4H7O7
	C2H4O5
	C2H4O3
	C3H2O5
	C2H2
	C3HO5
	C3HO5
	C8H16O2
	C8H14


In [7]:
print('Saving new species database...')
species_df.to_csv(species_csv, index=False)

Saving new species database...


In [8]:
# load it back in to test that it worked
species_df = pd.read_csv(species_csv)
print(f'Species database now contains {len(species_df)} unique species')

Species database now contains 356 unique species


## Load Reaction Database

In [9]:
reaction_csv = os.path.join(DFT_DIR, 'reaction_database.csv')
reaction_df = pd.read_csv(reaction_csv)

print(f'Reaction database contains {len(reaction_df)} unique reactions')

Reaction database contains 4689 unique reactions


In [10]:
# populate total species list
total_species_list = [rmgpy.species.Species().from_adjacency_list(adj_list) for adj_list in species_df['adjacency_list'].values]

def get_unique_species_index(species):
    for i in range(len(total_species_list)):
        if species.is_isomorphic(total_species_list[i]):
            return i
    raise IndexError('Species not in database')


def get_unique_string(reaction):
    """Returns the unique string for a given reaction
    Looks up unique species indices in the species index (based on adjacenecy list)
    and returns the result in sorted order, something like 12+300=14+303
    """
    reactants = []
    for sp in reaction.reactants:
        sp_index = get_unique_species_index(sp)
        reactants.append(sp_index)
    products = []
    for sp in reaction.products:
        sp_index = get_unique_species_index(sp)
        products.append(sp_index)
    reactants.sort()
    products.sort()

    reactants = [str(sp) for sp in reactants]
    products = [str(sp) for sp in products]
    unique_string = '+'.join(reactants) + '=' + '+'.join(products)
    return unique_string


def reaction2smiles(reaction):
    """Takes an RMG reaction and returns the smiles representation
    This is not sorted and therefore not unique, also smiles isn't unique to begin with
    """
    string = ""
    for react in reaction.reactants:
        if isinstance(react, rmgpy.species.Species):
            string += f"{react.molecule[0].to_smiles()}+"
        elif isinstance(react, rmgpy.molecule.Molecule):
            string += f"{react.to_smiles()}+"
    string = string[:-1]
    string += "_"
    for prod in reaction.products:
        if isinstance(prod, rmgpy.species.Species):
            string += f"{prod.molecule[0].to_smiles()}+"
        elif isinstance(prod, rmgpy.molecule.Molecule):
            string += f"{prod.to_smiles()}+"
    label = string[:-1]
    return label

In [11]:
# check if there are any new reactions to add
entries_to_add = []
print('Looking for new reactions in mechanism...')
for j in range(len(reaction_list)):
    unique_string = get_unique_string(reaction_list[j])
    already_exists = False
    for database_str in reaction_df['unique_string'].values:
        if unique_string == database_str:
            already_exists = True
            break
    if not already_exists:
        entries_to_add.append([j, unique_string])
print(f'Found {len(entries_to_add)} new reactions')

print('Added the following new reactions to the database:')
# actually add the new reactions
addition_index = 0
for j in range(len(entries_to_add)):
    rmg_index = entries_to_add[j][0]
    unique_string = entries_to_add[j][1]
    # make sure the 'unique_string' is actually unique compared to everything that came before it
    already_exists = False
    for k in range(j):
        # compare unique_string
        if entries_to_add[k][1] == unique_string:
            already_exists = True
            break
    if already_exists:
        continue
    
    
    name = str(reaction_list[rmg_index])
    smiles = reaction2smiles(reaction_list[rmg_index])
    i = len(reaction_df) + addition_index
    
    print(f'\t{name}')
    reaction_df = reaction_df.append({'i': i, 'name': name, 'SMILES': smiles, 'unique_string': unique_string}, ignore_index=True)
    addition_index += 1


Looking for new reactions in mechanism...
Found 2497 new reactions
Added the following new reactions to the database:
	C[CH2](20) + C[CH2](20) <=> butane(1)
	HO2(16) + C[CH]CC(23) <=> O2(2) + butane(1)
	butane(1) <=> H(14) + C[CH]CC(23)
	C[CH2](20) + C[CH2](20) <=> H(14) + C[CH]CC(23)
	H2O2(17) + C[CH]CC(23) <=> HO2(16) + butane(1)
	HO2(16) + C2H4(11) <=> CCO[O](29)
	[CH3](22) + C3H6(12) <=> C[CH]CC(23)
	O2(2) + CH4(10) <=> HO2(16) + [CH3](22)
	CCOO(53) + C[CH]CC(23) <=> CCO[O](29) + butane(1)
	C[CH]CC(23) + CCC(C)OO(77) <=> CCC(C)O[O](52) + butane(1)
	O2(2) + C[CH]CC(23) <=> [CH2]CC(C)OO(74)
	[CH2]CC(C)OO(74) <=> CCC(C)O[O](52)
	C4H9O4(165) <=> OH(15) + C4H8O3(179)
	HO2(16) + [CH2]CCC(24) <=> O2(2) + butane(1)
	butane(1) <=> H(14) + [CH2]CCC(24)
	C[CH2](20) + C[CH2](20) <=> H(14) + [CH2]CCC(24)
	H2O2(17) + [CH2]CCC(24) <=> HO2(16) + butane(1)
	CCOO(53) + [CH2]CCC(24) <=> CCO[O](29) + butane(1)
	[CH2]CCC(24) + CCC(C)OO(77) <=> CCC(C)O[O](52) + butane(1)
	[CH3](22) + C3H6(12) <=> [CH2]C

	HO2(16) + C3H6(12) <=> CCCO[O](63)
	C4H8O3(177) <=> OH(15) + C4H7O2(458)
	HO2(16) + C2H4(11) <=> OH(15) + C2H4O(35)
	H(14) + C2H4O(35) <=> CH2O(9) + [CH3](22)
	C4H7O2(458) <=> C2H3O(461) + C2H4O(35)
	HO2(16) + C2H3O(461) <=> O2(2) + C2H4O(35)
	C2H3O(461) + CCCCO[O](215) <=> C2H2O(603) + CCCCOO(225)
	C2H3O(461) + C[CH]CCOO(223) <=> C2H2O(603) + CCCCOO(225)
	C2H3O(461) + [CH2]CCCOO(224) <=> C2H2O(603) + CCCCOO(225)
	CC[O](31) + CCCCO[O](215) <=> C2H4O(35) + CCCCOO(225)
	CC[O](31) + C[CH]CCOO(223) <=> C2H4O(35) + CCCCOO(225)
	CC[O](31) + [CH2]CCCOO(224) <=> C2H4O(35) + CCCCOO(225)
	HO2(16) + C2H2O(603) <=> CO(6) + CO[O](30)
	CCCCOO(225) <=> [CH3](22) + [CH2]CCOO(156)
	O2(2) + [CH2]CC(21) <=> [CH2]CCOO(156)
	[CH2]CCOO(156) <=> HO2(16) + C3H6(12)
	[CH2]CCOO(156) <=> CCCO[O](63)
	O2(2) + C[CH]CC(23) <=> OH(15) + CC1CCO1(123)
	CCC(C)O[O](52) <=> OH(15) + CC1CCO1(123)
	O2(2) + [CH2]CCC(24) <=> OH(15) + CC1CCO1(123)
	CCCCO[O](215) <=> OH(15) + CC1CCO1(123)
	HO2(16) + C4H8(47) <=> OH(15) + CC1C

	C[O](75) + [CH2]CCCOO(224) <=> CH2O(9) + CCCCOO(225)
	H(14) + CCCCOO(225) <=> H2(13) + CCCCO[O](215)
	O2(2) + C[CH]CC(23) <=> OH(15) + CC1OC1C(5173)
	CCC(C)O[O](52) <=> OH(15) + CC1OC1C(5173)
	[CH2]CC(C)OO(74) <=> OH(15) + CC1OC1C(5173)
	HO2(16) + C4H8(47) <=> OH(15) + CC1OC1C(5173)
	HO2(16) + C4H8(46) <=> OH(15) + CC1OC1C(5173)
	OH(15) + CC1CCO1(123) <=> OH(15) + CC1OC1C(5173)
	C[CH]C(C)OO(72) <=> OH(15) + CC1OC1C(5173)
	OH(15) + CC1OC1C(5173) <=> O(5) + CCC(C)[O](65)
	OH(15) + CC1OC1C(5173) <=> OH(15) + C4H8O(159)
	HO2(16) + C3H4O2(640) <=> HO2(16) + C3H4O2(633)
	HO2(16) + C4H9O4(5180) <=> O2(2) + C4H10O4(5252)
	H2O2(17) + C4H9O4(5180) <=> HO2(16) + C4H10O4(5252)
	C[CH]CC(23) + C4H10O4(5252) <=> C4H9O4(5180) + butane(1)
	[CH2]CCC(24) + C4H10O4(5252) <=> C4H9O4(5180) + butane(1)
	OH(15) + C4H10O4(5252) <=> H2O(8) + C4H9O4(5180)
	CHO(76) + C4H10O4(5252) <=> CH2O(9) + C4H9O4(5180)
	CCO[O](29) + C4H10O4(5252) <=> CCOO(53) + C4H9O4(5180)
	CCC(C)O[O](52) + C4H10O4(5252) <=> C4H9O4(5180) +

	CO(6) + CCC[O](127) <=> CO(6) + [CH2]CCO(1016)
	HO2(16) + [CH2]CC(21) <=> OH(15) + CCC[O](127)
	CCCOO(152) <=> OH(15) + CCC[O](127)
	CH2O(9) + C[CH2](20) <=> CCC[O](127)
	CH2O(9) + C[CH2](20) <=> [CH2]CCO(1016)
	CH2O(9) + C[CH2](20) <=> O(5) + [CH2]CC(21)
	[CH2]CCO(1016) <=> O(5) + [CH2]CC(21)
	CCC[O](127) <=> O(5) + [CH2]CC(21)
	C2H3(37) + C[CH2](20) <=> C2H4(11) + C2H4(11)
	CH4(10) + C2H3(37) <=> [CH3](22) + C2H4(11)
	C2H3(37) + CCC(C)OO(77) <=> C2H4(11) + CCC(C)O[O](52)
	C4H8(47) <=> C2H3(37) + C[CH2](20)
	C2H3(37) + O[CH]CCCOO(1028) <=> C2H4(11) + C4H8O3(1060)
	C2H3(37) + CCCCOO(225) <=> C2H4(11) + CCCCO[O](215)
	COO(94) + C2H3(37) <=> CO[O](30) + C2H4(11)
	C2H3(37) + O[CH]CCOO(1188) <=> C2H4(11) + C3H6O3(453)
	C2H3(37) + CCCOO(152) <=> C2H4(11) + CCCO[O](63)
	C2H3(37) + CCOO(53) <=> C2H4(11) + CCO[O](29)
	C2H3(37) + CCCCOO(225) <=> C2H4(11) + C[CH]CCOO(223)
	C2H3(37) + C4H8O3(179) <=> C2H4(11) + C4H7O3(198)
	C4H8(46) <=> C2H3(37) + C[CH2](20)
	C[C]CC-2(474) <=> C2H3(37) + C[CH2](

	[CH3](22) + [O]CCCCOO(1029) <=> CH4(10) + C4H8O3(1060)
	H(14) + [O]CCCCOO(1029) <=> H2(13) + C4H8O3(1060)
	CO[O](30) + [O]CCCCOO(1029) <=> COO(94) + C4H8O3(1060)
	[O]CCCCOO(1029) + C4H9O4(5180) <=> C4H8O3(1060) + C4H10O4(5252)
	CCCO[O](63) + [O]CCCCOO(1029) <=> CCCOO(152) + C4H8O3(1060)
	[CH2]CCOO(156) + [O]CCCCOO(1029) <=> CCCOO(152) + C4H8O3(1060)
	OH(15) + [O]CCCCOO(1029) <=> OOCCCCOO(2193)
	[O]CCCCOO(1029) + [O]OCCCCOO(961) <=> C4H8O3(1060) + OOCCCCOO(2193)
	OH(15) + OCCC1CO1(268) <=> O(5) + [CH2]CCCOO(224)
	OH(15) + OCCC1CO1(268) <=> CH2O(9) + [CH2]CCOO(156)
	O2(2) + [CH2]CCCO(1014) <=> O(5) + [CH2]CCCOO(224)
	O2(2) + [CH2]CCCO(1014) <=> CH2O(9) + [CH2]CCOO(156)
	[O]OCCCCO(1020) <=> O(5) + [CH2]CCCOO(224)
	CH2O(9) + [CH2]CCOO(156) <=> [O]OCCCCO(1020)
	O[CH]CCCOO(1028) <=> O(5) + [CH2]CCCOO(224)
	CH2O(9) + [CH2]CCOO(156) <=> O[CH]CCCOO(1028)
	CH2O(9) + [CH2]CCOO(156) <=> [O]CCCCOO(1029)
	CH2O(9) + [CH2]CCOO(156) <=> O(5) + [CH2]CCCOO(224)
	CH2O(9) + [CH2]CCOO(156) <=> H(14) + C4H8

	HO2(16) + C3H3O6(7670) <=> O2(2) + C3H4O6(7668)
	HO2(16) + C3H4O6(7668) <=> H2O2(17) + C3H3O6(7670)
	C3H4O6(7668) + C[CH]CC(23) <=> C3H3O6(7670) + butane(1)
	C3H4O6(7668) + [CH2]CCC(24) <=> C3H3O6(7670) + butane(1)
	OH(15) + C3H4O6(7668) <=> H2O(8) + C3H3O6(7670)
	CHO(76) + C3H4O6(7668) <=> CH2O(9) + C3H3O6(7670)
	C2H3(37) + C3H4O6(7668) <=> C2H4(11) + C3H3O6(7670)
	CCO[O](29) + C3H4O6(7668) <=> CCOO(53) + C3H3O6(7670)
	C3H4O6(7668) + CCC(C)O[O](52) <=> C3H3O6(7670) + CCC(C)OO(77)
	C3H4O6(7668) + C[CH]C(C)OO(72) <=> C3H3O6(7670) + CCC(C)OO(77)
	C3H4O6(7668) + [CH2]CC(C)OO(74) <=> C3H3O6(7670) + CCC(C)OO(77)
	C3H4O6(7668) + C4H7O3(198) <=> C3H3O6(7670) + C4H8O3(179)
	C3H4O6(7668) + C4H9O4(181) <=> C3H3O6(7670) + CC(CCOO)OO(182)
	C3H4O6(7668) + C4H9O4(165) <=> C3H3O6(7670) + CC(CCOO)OO(182)
	C3H3O6(7670) + C4H8O3(177) <=> C3H4O6(7668) + C4H7O3(464)
	C3H3O6(7670) + C3H4O2(640) <=> C3H3O2(770) + C3H4O6(7668)
	C3H4O6(7668) + CCCCO[O](215) <=> C3H3O6(7670) + CCCCOO(225)
	C3H4O6(7668) + C[CH

	C2HO3(5674) + C[CH2](20) <=> C2H2O3(5906) + C2H4(11)
	C2HO3(5674) + [CH2]CC(21) <=> C2H2O3(5906) + C3H6(12)
	C2HO3(5674) + C[CH]C(42) <=> C2H2O3(5906) + C3H6(12)
	C2HO3(5674) + CCOO(53) <=> C2H2O3(5906) + CCO[O](29)
	C2HO3(5674) + CCC(C)OO(77) <=> C2H2O3(5906) + CCC(C)O[O](52)
	C2HO3(5674) + CC(CCOO)OO(182) <=> C2H2O3(5906) + C4H9O4(165)
	COO(94) + C2HO3(5674) <=> CO[O](30) + C2H2O3(5906)
	C2HO3(5674) + CCCCOO(225) <=> C2H2O3(5906) + CCCCO[O](215)
	C2HO3(5674) + CC(CCOO)OO(182) <=> C2H2O3(5906) + C4H9O4(181)
	C2HO3(5674) + CC(CC[O])OO(167) <=> C2H2O3(5906) + C4H8O3(177)
	C2HO3(5674) + [CH2]CCC(24) <=> C2H2O3(5906) + C4H8(47)
	C2HO3(5674) + C[CH]CC(23) <=> C2H2O3(5906) + C4H8(47)
	C2HO3(5674) + C[CH]CC(23) <=> C2H2O3(5906) + C4H8(46)
	C2HO3(5674) + O[CH]CCCOO(1028) <=> C2H2O3(5906) + C4H8O3(1060)
	C2HO3(5674) + [O]CCCCOO(1029) <=> C2H2O3(5906) + C4H8O3(1060)
	C2HO3(5674) + O[CH]CCOO(1188) <=> C2H2O3(5906) + C3H6O3(453)
	C2HO3(5674) + C2H3O(461) <=> C2H2O(603) + C2H2O3(5906)
	C2HO3(5674

	C2H3O(197) + O[CH]CCOO(1188) <=> C2H4O(35) + C3H6O3(453)
	C2H3O(197) + CCCOO(152) <=> C2H4O(35) + CCCO[O](63)
	C2H3O(197) + CCOO(53) <=> C2H4O(35) + CCO[O](29)
	C2H3O(197) + OOCCCCOO(2193) <=> C2H4O(35) + [O]OCCCCOO(961)
	COO(94) + C2H3O(197) <=> CO[O](30) + C2H4O(35)
	C2H4O(35) + C[CH]CCOO(223) <=> C2H3O(197) + CCCCOO(225)
	C2H4O(35) + [CH2]CCCOO(224) <=> C2H3O(197) + CCCCOO(225)
	C2H4O(35) + [CH2]CCOO(156) <=> C2H3O(197) + CCCOO(152)
	C2H4O(35) + C4H7O3(198) <=> C2H3O(197) + C4H8O3(179)
	C2H3O(197) + C4H10O4(5252) <=> C2H4O(35) + C4H9O4(5180)
	C2H3O(197) + C4H9O4(5180) <=> C2H2O(603) + C4H10O4(5252)
	C2H3O(197) + C4H9O4(5251) <=> C2H2O(603) + C4H10O4(5252)
	C2H3O(197) + C4H8O3(177) <=> C2H4O(35) + C4H7O3(464)
	C2H3O(197) + CCCO[O](63) <=> C2H2O(603) + CCCOO(152)
	C2H3O(197) + [CH2]CCOO(156) <=> C2H2O(603) + CCCOO(152)
	C2H4O(35) + C[CH]C(C)OO(72) <=> C2H3O(197) + CCC(C)OO(77)
	C2H3O(197) + C3H4O2(640) <=> C2H4O(35) + C3H3O2(770)
	C2H3O(197) + C3H4O6(7668) <=> C2H4O(35) + C3H3O6(7670

	C2H3(37) + C4H7O3(464) <=> C#C(1267) + C4H8O3(177)
	C2H3(37) + C3H3O2(770) <=> C#C(1267) + C3H4O2(640)
	C2H3(37) + CCCCO[O](215) <=> C#C(1267) + CCCCOO(225)
	C2H3(37) + C[CH]CCOO(223) <=> C#C(1267) + CCCCOO(225)
	C2H3(37) + [CH2]CCCOO(224) <=> C#C(1267) + CCCCOO(225)
	O2(2) + C3H2O3(3892) <=> C3H2O5(23581)
	C2H3(37) + C4H9O4(5180) <=> C#C(1267) + C4H10O4(5252)
	C2H3(37) + C4H9O4(5251) <=> C#C(1267) + C4H10O4(5252)
	C2H3(37) + C4H7O3(13622) <=> C#C(1267) + C4H8O3(5250)
	C2H(4) + O[CH]CCCOO(1028) <=> C#C(1267) + C4H8O3(1060)
	C2H(4) + [O]CCCCOO(1029) <=> C#C(1267) + C4H8O3(1060)
	C2H3(37) + [O]OCCCCOO(961) <=> C#C(1267) + OOCCCCOO(2193)
	C2H3(37) + C2H3O3(1621) <=> C#C(1267) + C2H4O3(20827)
	CO2(7) + C2H2O3(5906) <=> C3H2O5(18718)
	O2(2) + C3H2O3(3892) <=> C3H2O5(18718)
	C3H2O5(18718) <=> C3H2O5(23581)
	CO2(7) + C2H2O3(5906) <=> C3H2O5(23581)
	O2(2) + C3H2O3(3892) <=> CO2(7) + C2H2O3(5829)
	C3H2O5(23581) <=> CO2(7) + C2H2O3(5829)
	CO2(7) + C2H2O3(5906) <=> CO2(7) + C2H2O3(5829)
	C3H2O5(

	C2H3O3(1621) + C4H7O2(458) <=> C2H4O3(20827) + C4H6O2(592)
	C2H3O3(1621) + C4H7O2(193) <=> C2H4O3(20827) + C4H6O2(592)
	C2H(4) + C4H7O2(458) <=> C#C(1267) + C4H6O2(592)
	C2H(4) + C4H7O2(193) <=> C#C(1267) + C4H6O2(592)
	C4H6O2(592) <=> C2H3O(461) + C2H3O(197)
	C4H6O2(592) <=> CHO(76) + C3H5O(196)
	CH2O(9) + C4H6O2(592) <=> C2H4O(467) + C3H4O2(633)
	CO2(7) + C4H6O2(592) <=> C2HO3(5674) + C3H5O(196)
	C2H(4) + C2H4O4(9737) <=> C#C(1267) + C2H3O4(9742)
	C4H6O2(592) <=> CO2(7) + C3H6(12)
	C4H6O2(592) <=> C2H3(37) + C2H3O2(8839)
	C2HO3(5674) + C2H4O(467) <=> C2H2O3(5906) + C2H3O(461)
	C2HO3(5674) + C2H4O(35) <=> C2H2O3(5906) + C2H3O(461)
	C2H2O(603) + C2H4O(35) <=> C4H6O2(1984)
	C2H2O(603) + C2H4O(467) <=> C4H6O2(1984)
	C4H6O2(592) <=> C4H6O2(1984)
	C2H2O(603) + C2H4O(35) <=> CO2(7) + C3H6(12)
	C2H2O(603) + C2H4O(35) <=> C2H3(37) + C2H3O2(8839)
	C4H6O2(1984) <=> CO2(7) + C3H6(12)
	C2H2O(603) + C2H4O(467) <=> CO2(7) + C3H6(12)
	CO2(7) + C3H6(12) <=> C2H3O(461) + C2H3O(197)
	CO2(7) + C3H6(12)

	C4H8(47) + [CH2]CCCOO(224) <=> C4H7(470) + CCCCOO(225)
	C2H4O(467) + C4H7(470) <=> C2H3O(461) + C4H8(47)
	[CH2]CCOO(156) + C4H8(47) <=> CCCOO(152) + C4H7(470)
	C4H8(46) + [CH2]CCCOO(224) <=> C4H7(470) + CCCCOO(225)
	[CH2]CCOO(156) + C4H8(46) <=> CCCOO(152) + C4H7(470)
	C4H7O3(198) + C4H8(47) <=> C4H7(470) + C4H8O3(179)
	C4H7O3(198) + C4H8(46) <=> C4H7(470) + C4H8O3(179)
	C[C]CC-2(474) <=> H(14) + C4H7(470)
	C4H8(47) + C4H9O4(5180) <=> C4H7(470) + C4H10O4(5252)
	C4H7(470) + C4H10O4(5252) <=> C4H8(46) + C4H9O4(5180)
	C4H8(46) + C[CH]C(C)OO(72) <=> C4H7(470) + CCC(C)OO(77)
	C4H7O3(464) + C4H8(47) <=> C4H7(470) + C4H8O3(177)
	C4H7(470) + C4H8O3(177) <=> C4H7O3(464) + C4H8(46)
	C4H8(47) + C[CH]C(C)OO(72) <=> C4H7(470) + CCC(C)OO(77)
	C3H4O2(640) + C4H7(470) <=> C3H3O2(770) + C4H8(47)
	C3H4O2(640) + C4H7(470) <=> C3H3O2(770) + C4H8(46)
	C3H3O6(7670) + C4H8(47) <=> C3H4O6(7668) + C4H7(470)
	C3H4O6(7668) + C4H7(470) <=> C3H3O6(7670) + C4H8(46)
	C2H3O4(9742) + C4H8(47) <=> C2H4O4(9737) + C4H7(

	C3H2O5(7732) + [CH2]CCC(24) <=> C3HO5(16374) + butane(1)
	OH(15) + C3H2O5(7732) <=> H2O(8) + C3HO5(16374)
	CH2O(9) + C3HO5(16374) <=> CHO(76) + C3H2O5(7732)
	CH4(10) + C3HO5(16374) <=> [CH3](22) + C3H2O5(7732)
	C2H3(37) + C3H2O5(7732) <=> C2H4(11) + C3HO5(16374)
	C3H2O5(7732) + C3H5(56) <=> C3HO5(16374) + C3H6(12)
	C3HO5(16374) + C[CH]CC(23) <=> C3H2O5(7732) + C4H8(46)
	C3HO5(16374) + C[CH]CC(23) <=> C3H2O5(7732) + C4H8(47)
	CCO[O](29) + C3H2O5(7732) <=> CCOO(53) + C3HO5(16374)
	C3H2O5(7732) + CCC(C)O[O](52) <=> C3HO5(16374) + CCC(C)OO(77)
	C3H2O5(7732) + C[CH]C(C)OO(72) <=> C3HO5(16374) + CCC(C)OO(77)
	C3H2O5(7732) + [CH2]CC(C)OO(74) <=> C3HO5(16374) + CCC(C)OO(77)
	C3H2O5(7732) + C4H7O3(198) <=> C3HO5(16374) + C4H8O3(179)
	C3H2O5(7732) + C4H7O3(199) <=> C3HO5(16374) + C4H8O3(179)
	C3H2O5(7732) + C4H9O4(181) <=> C3HO5(16374) + CC(CCOO)OO(182)
	C3H2O5(7732) + C4H9O4(165) <=> C3HO5(16374) + CC(CCOO)OO(182)
	C3HO5(16374) + C4H8O3(177) <=> C3H2O5(7732) + C4H7O3(464)
	C3HO5(16374) + C4H8(

	C2H2O3(7721) + C2H3O(461) <=> C2HO3(7549) + C2H4O(35)
	C2H2O3(7721) + C2H3O(197) <=> C2HO3(7549) + C2H4O(35)
	CO[O](30) + C2H2O3(7721) <=> COO(94) + C2HO3(7549)
	C2H2O3(7721) + CCCO[O](63) <=> C2HO3(7549) + CCCOO(152)
	C2H2O3(7721) + [CH2]CCOO(156) <=> C2HO3(7549) + CCCOO(152)
	C2H2O3(7721) + C3HO3(8462) <=> C2HO3(7549) + C3H2O3(7654)
	C2HO3(7549) + OOCCCCOO(2193) <=> C2H2O3(7721) + [O]OCCCCOO(961)
	C2H2O3(7721) + C2H3O4(9742) <=> C2HO3(7549) + C2H4O4(9737)
	C2HO3(7549) + C3H4O6(7668) <=> C2H2O3(7721) + C3H3O6(7670)
	C2H2O3(7721) + C3H3O6(13048) <=> C2HO3(7549) + C3H4O6(7668)
	C2H2O3(7721) + C4H7O3(13622) <=> C2HO3(7549) + C4H8O3(5250)
	C2H2O3(7721) <=> CO(6) + CH2O2(1191)
	HO2(16) + C3HO3(15067) <=> CO2(7) + C2H2O3(7721)
	C3H2O5(7732) <=> CO2(7) + C2H2O3(7721)
	CO2(7) + CH2O(9) <=> H(14) + C2HO3(7549)
	C2H2O3(5906) <=> H(14) + C2HO3(7549)
	CO(6) + CH2O2(1191) <=> H(14) + C2HO3(7549)
	CHO2(2991) + C2HO3(7549) <=> CO2(7) + C2H2O3(7721)
	C[O](75) + C2HO3(7549) <=> CH2O(9) + C2H2O3(7721)

	CO[O](30) + C2H4O5(20733) <=> COO(94) + C2H3O5(31742)
	C2H3O5(31742) + C4H10O4(5252) <=> C2H4O5(20733) + C4H9O4(5180)
	C2H4O5(20733) + C4H9O4(5251) <=> C2H3O5(31742) + C4H10O4(5252)
	C2H4O5(20733) + CCCO[O](63) <=> C2H3O5(31742) + CCCOO(152)
	C2H4O5(20733) + [CH2]CCOO(156) <=> C2H3O5(31742) + CCCOO(152)
	C2H4O5(20733) + C3HO3(8462) <=> C2H3O5(31742) + C3H2O3(7654)
	C2H3O5(31742) + OOCCCCOO(2193) <=> C2H4O5(20733) + [O]OCCCCOO(961)
	C2H3O4(9742) + C2H4O5(20733) <=> C2H3O5(31742) + C2H4O4(9737)
	C2H3O5(31742) + C3H4O6(7668) <=> C2H4O5(20733) + C3H3O6(7670)
	C2H4O5(20733) + C3H3O6(13048) <=> C2H3O5(31742) + C3H4O6(7668)
	C2H4O5(20733) + C4H7O3(13622) <=> C2H3O5(31742) + C4H8O3(5250)
	C2H3O3(1621) + C2H4O5(20733) <=> C2H3O5(31742) + C2H4O3(20827)
	C2H3O3(1623) + C2H4O5(20733) <=> C2H3O5(31742) + C2H4O3(20827)
	C2H4O5(20733) + C3HO5(16374) <=> C2H3O5(31742) + C3H2O5(7732)
	C2H3O5(31742) + C4H7(470) <=> C2H4O5(20733) + C4H6(6939)
	CH4(10) + C2H3O5(31742) <=> [CH3](22) + C2H4O5(20733)
	H(14)

In [12]:
print('Saving new reaction database...')
reaction_df.to_csv(reaction_csv, index=False)

Saving new reaction database...


In [13]:
# load it back in to test that it worked
reaction_df = pd.read_csv(reaction_csv)
print(f'Reaction database now contains {len(reaction_df)} unique reactions')

Reaction database now contains 7132 unique reactions
