In [1]:
# script to collect all of the kinetic calculations for this node in Disproportionation:
# Root_Ext-2R!H-R_2R!H->C_4R->C 
# and put them together into a database


In [2]:
import os
import re
import glob
import itertools

import rmgpy.reaction
import rmgpy.chemkin
import rmgpy.data.kinetics

import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict

In [3]:
DFT_DIR = "/work/westgroup/harris.se/autoscience/autoscience/butane/dft/"
kinetics_libs = glob.glob(os.path.join(DFT_DIR, 'kinetics', 'reaction*', 'arkane', 'RMG_libraries'))

In [4]:
# only include reactions that belong to the disproportionation family

# Load the base model
basedir = '/work/westgroup/harris.se/autoscience/autoscience/butane/models/rmg_model'
base_chemkin = os.path.join(basedir, 'chem_annotated.inp')
dictionary = os.path.join(basedir, 'species_dictionary.txt')
transport = os.path.join(basedir, 'tran.dat')
species_list, reaction_list = rmgpy.chemkin.load_chemkin_file(base_chemkin, dictionary_path=dictionary, transport_path=transport)
print(f'{len(species_list)} species, {len(reaction_list)} reactions')



110 species, 1822 reactions


In [5]:
# Load the Arkane kinetics
entries = []
for i, lib_path in enumerate(kinetics_libs):
    matches = re.search('reaction_([0-9]{4})', lib_path)
    reaction_index = int(matches[1])
    # skip entries not in Disproportionation
    if reaction_list[reaction_index].family != 'Disproportionation':
        continue
    ark_kinetics_database = rmgpy.data.kinetics.KineticsDatabase()
    ark_kinetics_database.load_libraries(lib_path)
    
    # TODO fix bug related to load_libraries not getting the actual name
    for key in ark_kinetics_database.libraries[''].entries.keys():
        entry = ark_kinetics_database.libraries[''].entries[key]
        entry.index = reaction_index
        entries.append(entry)


In [6]:
# compile it all into a single database and a single library which I'll call harris_butane
ark_kinetics_database = rmgpy.data.kinetics.KineticsDatabase()
ark_kinetics_database.libraries['kinetics'] = rmgpy.data.kinetics.KineticsLibrary()
ark_kinetics_database.libraries['kinetics'].label = 'harris_butane'
ark_kinetics_database.libraries['kinetics'].name = 'harris_butane'
ark_kinetics_database.libraries['kinetics'].entries = OrderedDict()
for entry in entries:
    ark_kinetics_database.libraries['kinetics'].entries[entry.label] = entry

In [7]:
# save the results
output_path = os.path.join(DFT_DIR, 'disproportionation_kinetics')
ark_kinetics_database.save_libraries(output_path, reindex=False)

In [8]:
print(len(entries), 'saved')

64 saved


In [9]:
# for entry in entries:
#     print(entry.index)

In [10]:
entries[0]

<Entry index=213 label="C2H5(33) + HO2(16) <=> H2O2(17) + C2H4(11)">

In [11]:
# load the disproportionation database
# load the thermo database
thermo_libs = [
    'BurkeH2O2',
    'primaryThermoLibrary',
    'FFCM1(-)',
    'CurranPentane',
    'Klippenstein_Glarborg2016',
    'thermo_DFT_CCSDTF12_BAC',
    'DFT_QCI_thermo',
    'CBS_QB3_1dHR',
]

thermo_library_path = os.path.join(rmgpy.settings['database.directory'], 'thermo')
thermo_database = rmgpy.data.thermo.ThermoDatabase()
thermo_database.load(
    thermo_library_path,
    libraries=thermo_libs
)


# load the revised Disproportionation family
family = 'Disproportionation'
ref_library_path = os.path.join(rmgpy.settings['database.directory'], 'kinetics')
kinetics_database = rmgpy.data.kinetics.KineticsDatabase()
kinetics_database.load(
    ref_library_path,
    libraries=[],
    families=[family]
)

# load the entire database
ref_db = rmgpy.data.rmg.RMGDatabase()
ref_db.kinetics = kinetics_database
ref_db.thermo = thermo_database

In [None]:
# i = 213
# templates = ref_db.kinetics.generate_reactions(
#     reaction_list[i].reactants,
#     products=reaction_list[i].products,
#     only_families=[family]
# )

In [None]:
# def get_species(lib_sp):
#     for mechanism_sp in species_list:
#         if mechanism_sp.is_isomorphic(lib_sp):
#             return mechanism_sp

In [12]:
def relabel(input_reaction):
    # copied from AutoTST.autotst.reaction.py
    def get_rmg_mol(smile):
        smiles_conversions = {
                "[CH]": "[CH...]",
                "CARBONMONOXIDE": "[C-]#[O+]"
            }

        if smile.upper() in list(smiles_conversions.keys()):
            smile = smiles_conversions[smile.upper()]
        return rmgpy.molecule.Molecule(smiles=smile).generate_resonance_structures()
    
    rmg_reactants = [get_rmg_mol(sp.smiles) for sp in input_reaction.reactants]
    rmg_products = [get_rmg_mol(sp.smiles) for sp in input_reaction.products]

    combos_to_try = list(itertools.product(
                list(itertools.product(*rmg_reactants)),
                list(itertools.product(*rmg_products))
            ))
    
    for rmg_reactants, rmg_products in combos_to_try:

        test_reaction = rmgpy.reaction.Reaction(
            reactants=list(rmg_reactants),
            products=list(rmg_products)
        )

        try:
            labeled_r, labeled_p = ref_db.kinetics.families[family].get_labeled_reactants_and_products(
                test_reaction.reactants,
                test_reaction.products
            )

            
            for i in range(0, len(input_reaction.reactants)):
                for reactant in labeled_r:
                    if input_reaction.reactants[i].molecule[0].is_isomorphic(reactant):
                        input_reaction.reactants[i].molecule[0] = reactant
                        break
            for i in range(0, len(input_reaction.products)):
                for product in labeled_p:
                    if input_reaction.products[i].molecule[0].is_isomorphic(product):
                        input_reaction.products[i].molecule[0] = product
            return True
#             return labeled_r, labeled_p

        except:
            raise
            pass
    return False

In [13]:
def print_labels(rxn):
    for sp in rxn.reactants + rxn.products:
        print(sp.molecule[0].get_all_labeled_atoms())

In [17]:
idx = 213

print_labels(reaction_list[idx])
relabel(reaction_list[idx])
print_labels(reaction_list[idx])



{'*1': <Atom 'O.'>}
{'*2': <Atom 'C'>, '*3': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*1': <Atom 'O'>, '*4': <Atom 'H'>}
{'*3': <Atom 'C'>, '*2': <Atom 'C'>}
{'*1': <Atom 'O.'>}
{'*2': <Atom 'C'>, '*3': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*1': <Atom 'O'>, '*4': <Atom 'H'>}
{'*3': <Atom 'C'>, '*2': <Atom 'C'>}


In [None]:
ark_kinetics_database.libraries["kinetics"].entries[idx].item.reactants[1].molecule[0].generate_resonance_structures()

In [21]:
idx = 213

print_labels(ark_kinetics_database.libraries["kinetics"].entries[idx].item)
relabel(ark_kinetics_database.libraries["kinetics"].entries[idx].item)
print_labels(ark_kinetics_database.libraries["kinetics"].entries[idx].item)


{}
{}
{}
{}
{'*2': <Atom 'C'>, '*3': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*1': <Atom 'O.'>}
{'*1': <Atom 'O'>, '*4': <Atom 'H'>}
{'*2': <Atom 'C'>, '*3': <Atom 'C'>}


In [None]:
# This portion copied from rmg_tools/database/add_training_reactions

In [18]:
import os
import numpy as np
import matplotlib.pyplot as plt

import rmgpy.data.thermo
import rmgpy.data.kinetics
import rmgpy.reaction
import rmgpy.species
import rmgpy

import rmgpy.chemkin

In [19]:
def duplicate_exists(test_entry):
    for entry in training_depo.entries:
        if training_depo.entries[entry].item.is_isomorphic(test_entry.item):
            return True
    return False

In [20]:
# load the new training reactions
new_training_rxns = output_path
ark_kinetics_database = rmgpy.data.kinetics.KineticsDatabase()
ark_kinetics_database.load_libraries(new_training_rxns)
print(f'{len(ark_kinetics_database.libraries["kinetics"].entries)} new reactions loaded')

64 new reactions loaded


In [37]:
# actually relabel all of the atoms from the entry
for entry in ark_kinetics_database.libraries["kinetics"].entries:
    index = ark_kinetics_database.libraries["kinetics"].entries[entry].index
    print(index)
    print_labels(ark_kinetics_database.libraries["kinetics"].entries[entry].item)
#     print(index, ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[0].molecule[0].get_all_labeled_atoms())
#     print(index, ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[1].molecule[0].get_all_labeled_atoms())
    relabel(ark_kinetics_database.libraries["kinetics"].entries[entry].item)
    print('-----------------------------------------')
    print_labels(ark_kinetics_database.libraries["kinetics"].entries[entry].item)
#     print(index, ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[0].molecule[0].get_all_labeled_atoms())
#     print(index, ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[1].molecule[0].get_all_labeled_atoms())
    print()

213
{'*1': <Atom 'C.'>}
{'*1': <Atom 'O.'>}
{'*1': <Atom 'O'>, '*4': <Atom 'H'>}
{'*1': <Atom 'C'>, '*4': <Atom 'H'>}
-----------------------------------------
{'*2': <Atom 'C'>, '*3': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*1': <Atom 'O.'>}
{'*1': <Atom 'O'>, '*4': <Atom 'H'>}
{'*2': <Atom 'C'>, '*3': <Atom 'C'>}

390
{'*2': <Atom 'C'>, '*3': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*1': <Atom 'C..'>}
{'*1': <Atom 'C.'>}
{'*3': <Atom 'C'>, '*2': <Atom 'C'>}
-----------------------------------------
{'*2': <Atom 'C'>, '*3': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*1': <Atom 'C..'>}
{'*1': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*3': <Atom 'C'>, '*2': <Atom 'C'>}

391
{'*2': <Atom 'C'>, '*3': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*1': <Atom 'C.'>}
{'*2': <Atom 'C'>, '*3': <Atom 'C'>}
{'*3': <Atom 'C'>, '*2': <Atom 'C'>}
-----------------------------------------
{'*2': <Atom 'C'>, '*3': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*1': <Atom 'C.'>}
{'*1': <Atom 'C'>, '*4': <Atom 'H'>}
{'*2': <Atom 'C'>, '*3': <Atom 'C'>}

402
{'*1'

ActionError: Something wrong with products that RMG cannot find a match!

In [25]:
idx = 213

print_labels(ark_kinetics_database.libraries["kinetics"].entries[idx].item)
relabel(ark_kinetics_database.libraries["kinetics"].entries[idx].item)
print_labels(ark_kinetics_database.libraries["kinetics"].entries[idx].item)


{'*1': <Atom 'C.'>}
{'*1': <Atom 'O.'>}
{'*1': <Atom 'O'>, '*4': <Atom 'H'>}
{'*1': <Atom 'C'>, '*4': <Atom 'H'>}
{'*2': <Atom 'C'>, '*3': <Atom 'C.'>, '*4': <Atom 'H'>}
{'*1': <Atom 'O.'>}
{'*1': <Atom 'O'>, '*4': <Atom 'H'>}
{'*2': <Atom 'C'>, '*3': <Atom 'C'>}


In [None]:
# check labeling for reaction 213
print(ark_kinetics_database.libraries["kinetics"].entries[213].item)
print()

for sp in ark_kinetics_database.libraries["kinetics"].entries[213].item.reactants + ark_kinetics_database.libraries["kinetics"].entries[213].item.products:
    print(sp, sp.molecule[0].get_all_labeled_atoms())

In [None]:
# Load Kinetics Database (quick, automatically loads recommended families)
family = 'Disproportionation'
ref_library_path = os.path.join(rmgpy.settings['database.directory'], 'kinetics')
ref_database = rmgpy.data.kinetics.KineticsDatabase()
ref_database.load(
    ref_library_path,
    libraries=[],
    families=[family]
)
training_depo = ref_database.families['Disproportionation'].get_training_depository()
print(len(training_depo.entries), 'training reactions')

In [None]:
species_dict = training_depo.get_species(os.path.join(rmgpy.settings['database.directory'], 'kinetics', 'families', family, 'training', 'dictionary.txt'))

## Redo the species dictionary to include everything we'll need

In [None]:
def equivalent_labels(dict1, dict2):
    try:
        for key in dict1.keys():
            if dict2[key].atomtype != dict1[key].atomtype:
                return False
        for key in dict2.keys():
            if dict2[key].atomtype != dict1[key].atomtype:
                return False
    except KeyError:
        return False
    return True

# # should return true
# equivalent_labels(
#     ark_kinetics_database.libraries["kinetics"].entries[213].item.reactants[0].molecule[0].get_all_labeled_atoms(),
#     species_dict['C2H5'].molecule[0].get_all_labeled_atoms()
# )

In [None]:
def get_species(sp):
    # look for the species in the training depo and return it IF it has same labeling
    for key in species_dict.keys():
        if sp.is_isomorphic(species_dict[key]):
            if equivalent_labels(sp.molecule[0].get_all_labeled_atoms(), species_dict[key].molecule[0].get_all_labeled_atoms()):
                return species_dict[key]

In [None]:
print(ark_kinetics_database.libraries["kinetics"].entries[213].item.reactants[0].molecule[0].get_all_labeled_atoms())

dict_spec = get_species(ark_kinetics_database.libraries["kinetics"].entries[213].item.reactants[0])
print(dict_spec)

print(dict_spec.molecule[0].get_all_labeled_atoms())

In [None]:
entries_to_add = set()
for entry in ark_kinetics_database.libraries["kinetics"].entries:
    reactants = ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants
    products = ark_kinetics_database.libraries["kinetics"].entries[entry].item.products
    for sp in reactants + products:
        dict_sp = get_species(sp)
        if not dict_sp:
#             print('Exact Entry not found:', sp, sp.molecule[0].get_all_labeled_atoms())
            entries_to_add.add(sp)

for sp in entries_to_add:
    if sp.label in species_dict.keys():
        raise ValueError(f'duplicate {sp.label}')

    species_dict[sp.label] = sp

In [None]:
# save and reload the dictionary
training_dir = os.path.join(rmgpy.settings['database.directory'], 'kinetics', 'families', family, 'training')
training_depo.save_dictionary(os.path.join(training_dir, 'dictionary.txt'))

# Load Kinetics Database (quick, automatically loads recommended families)
family = 'Disproportionation'
ref_library_path = os.path.join(rmgpy.settings['database.directory'], 'kinetics')
ref_database = rmgpy.data.kinetics.KineticsDatabase()
ref_database.load(
    ref_library_path,
    libraries=[],
    families=[family]
)
training_depo = ref_database.families['Disproportionation'].get_training_depository()
print(len(training_depo.entries), 'training reactions')

In [None]:
# check that there's nothing new to add now
entries_to_add = set()
for entry in ark_kinetics_database.libraries["kinetics"].entries:
    reactants = ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants
    products = ark_kinetics_database.libraries["kinetics"].entries[entry].item.products
    for sp in reactants + products:
        dict_sp = get_species(sp)
        if not dict_sp:
            entries_to_add.add(sp)
assert len(entries_to_add) == 0

In [None]:
# unused function for relabeling/incrementing species
def increment_label(old_label):
    if type(old_label) != str:
        old_label = old_label.label
    
    tokens = old_label.split('-')
    if len(tokens) == 1:
        return f'{old_label}-2'
    
    new_num = str(int(tokens[-1]) + 1)
    tokens[-1] = new_num
    return '-'.join(tokens)

# # tests should print 'C6H5-3' and HO2-2
# print(increment_label(species_dict['C6H5-2']))
# print(increment_label(reaction_list[213].reactants[0].label))

## Change the names to match the ones already in the species dictionary

In [None]:
# for entry in ark_kinetics_database.libraries["kinetics"].entries:
#     for i in range(0, len(ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants)):
#         old_reactant_name = ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[i].label
#         official_sp = get_species(ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[i])
#         # it already exists so we have to revise the label names in the entry
#         if official_sp:
#             print(f'changing species {}')
#             ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[i] = official_sp
#             ark_kinetics_database.libraries["kinetics"].entries[entry].label = ark_kinetics_database.libraries["kinetics"].entries[entry].label.replace(
#                 old_reactant_name,
#                 official_sp.label
#         else:
#             raise ValueError('did not successfully add to training depo species dictionary')
#     break

In [None]:
# go through the new training reactions and make sure they use existing species dictionary definition
for entry in ark_kinetics_database.libraries["kinetics"].entries:
    for i in range(0, len(ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants)):
        old_reactant_name = ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[i].label
        official_sp = get_species(ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[i])
        # it already exists so we have to revise the label names in the entry
        if official_sp:
            ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[i] = official_sp
            ark_kinetics_database.libraries["kinetics"].entries[entry].label = ark_kinetics_database.libraries["kinetics"].entries[entry].label.replace(
                old_reactant_name,
                official_sp.label
            )
        else:
            raise ValueError('did not successfully add to training depo species dictionary')
#             # check if it exists but with different labeling, in which case we need to change the name
#             keys_to_add = {}
#             for key in species_dict.keys():
#                 entry_reactant = ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[i]
#                 if entry_reactant.is_isomorphic(species_dict[key]) and \
#                     not same_labels(entry_reactant, species_dict[key]):
                    
#                     # we need to change the name to add -2 or -3 to the species name
#                     new_label = increment_label(species_dict[key])
#                     ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[i].label = new_label
#                     keys_to_add[new_label] = ark_kinetics_database.libraries["kinetics"].entries[entry].item.reactants[i]
                    
#                     ark_kinetics_database.libraries["kinetics"].entries[entry].label = ark_kinetics_database.libraries["kinetics"].entries[entry].label.replace(
#                         old_reactant_name,
#                         new_label
#                     )
#             for key in keys_to_add.keys():
#                 species_dict[key] = keys_to_add[key]
            

    # copy of the above, but for products
    for i in range(0, len(ark_kinetics_database.libraries["kinetics"].entries[entry].item.products)):
        old_product_name = ark_kinetics_database.libraries["kinetics"].entries[entry].item.products[i].label
        official_sp = get_species(ark_kinetics_database.libraries["kinetics"].entries[entry].item.products[i])
        if official_sp:
            ark_kinetics_database.libraries["kinetics"].entries[entry].item.products[i] = official_sp
            ark_kinetics_database.libraries["kinetics"].entries[entry].label = ark_kinetics_database.libraries["kinetics"].entries[entry].label.replace(
                old_product_name,
                official_sp.label
            )
        else:
            raise ValueError('did not successfully add to training depo species dictionary')
#             # check if it exists but with different labeling, in which case we need to change the name
#             keys_to_add = {}
#             for key in species_dict.keys():
#                 entry_product = ark_kinetics_database.libraries["kinetics"].entries[entry].item.products[i]
#                 if entry_product.is_isomorphic(species_dict[key]) and \
#                     not same_labels(entry_product, species_dict[key]):
                    
#                     # we need to change the name to add -2 or -3 to the species name
#                     new_label = increment_label(species_dict[key])
#                     ark_kinetics_database.libraries["kinetics"].entries[entry].item.products[i].label = new_label
#                     keys_to_add[new_label] = ark_kinetics_database.libraries["kinetics"].entries[entry].item.products[i]

#                     ark_kinetics_database.libraries["kinetics"].entries[entry].label = ark_kinetics_database.libraries["kinetics"].entries[entry].label.replace(
#                         old_product_name,
#                         new_label
#                     )
#             for key in keys_to_add.keys():
#                 species_dict[key] = keys_to_add[key]


In [None]:
# Splice the new training reactions into the training depo
index_start = 100

for i, entry in enumerate(ark_kinetics_database.libraries['kinetics'].entries):
    
    if duplicate_exists(ark_kinetics_database.libraries['kinetics'].entries[entry]):
        continue
    if ark_kinetics_database.libraries['kinetics'].entries[entry].item.elementary_high_p:
        # manual fix to this issue 
        ark_kinetics_database.libraries['kinetics'].entries[entry].item.elementary_high_p = False
    
    j = index_start + i
    while j in training_depo.entries.keys():
#         print(f'skipping index {j}')
        j += 1
        continue
#     print(j, ark_kinetics_database.libraries['kinetics'].entries[entry])
    training_depo.entries[j] = ark_kinetics_database.libraries['kinetics'].entries[entry]
print(len(training_depo.entries), 'training reactions')

In [None]:
# is this the only mistake?
# CH3-2 should be CH3_r1-2

for entry in training_depo.entries:
    if 'CH3-2' in training_depo.entries[entry].label:
        print(training_depo.entries[entry].index, training_depo.entries[entry])
        print(training_depo.entries[entry].item.products[0])

In [None]:
# of course not, CH@(23) should be CH2-2

for entry in training_depo.entries:
    if 'CH2(23)' in training_depo.entries[entry].label:
        print(training_depo.entries[entry].index, training_depo.entries[entry])
        print(training_depo.entries[entry].item.reactants[1])

In [None]:
# of course not, CH@(23) should be CH2-2

for entry in training_depo.entries:
    if 'CH3(18)' in training_depo.entries[entry].label:
        print(training_depo.entries[entry].index, training_depo.entries[entry])
        print(training_depo.entries[entry].item.products[0])

In [None]:
training_depo.entries[entry]

In [None]:
dir(training_depo)

In [None]:
# Save the results somewhere else
training_dir = os.path.join(rmgpy.settings['database.directory'], 'kinetics', 'families', family, 'training')
# training_depo.save_dictionary(os.path.join(training_dir, 'dictionary.txt'))
training_depo.save(os.path.join(training_dir, 'reactions.py'))