In [1]:
from rmgpy.data.rmg import RMGDatabase
from rmgpy.chemkin import saveChemkinFile, saveSpeciesDictionary
from rmgpy.rmg.model import Species
from rmgpy import settings
from convertKineticsLibraryToTrainingReactions import addAtomLabelsForReaction


## load lib_rxn

In [2]:
database = RMGDatabase()
libraries = ['vinylCPD_H', 'C3', 'C10H11', 'Fulvene_H', 'naphthalene_H']
database.load(settings['database.directory'], kineticsFamilies='all', reactionLibraries = libraries, kineticsDepositories='all')

## generate fam_rxn, spec replacement and get reactionDict

In [3]:
reactionDict = {}
for libraryName in libraries:
    kineticLibrary = database.kinetics.libraries[libraryName]
    for index, entry in kineticLibrary.entries.iteritems():
        lib_rxn = entry.item
        lib_rxn.kinetics = entry.data 
        lib_rxn.index = entry.index
        # Let's make RMG try to generate this reaction from the families!
        fam_rxn_list = []
        rxt_mol_mutation_num = 1
        pdt_mol_mutation_num = 1
        for reactant in lib_rxn.reactants:
            rxt_mol_mutation_num *= len(reactant.molecule)

        for product in lib_rxn.products:
            pdt_mol_mutation_num *= len(product.molecule)

        for mutation_i in range(rxt_mol_mutation_num):
            rxts_mol = [spc.molecule[mutation_i%(len(spc.molecule))] for spc in lib_rxn.reactants]
            pdts_mol = [spc.molecule[0] for spc in lib_rxn.products]
            fam_rxn_list.extend(database.kinetics.generateReactionsFromFamilies(
                            reactants=rxts_mol, products=pdts_mol))


        if len(fam_rxn_list) == 1:
            fam_rxn = fam_rxn_list[0]

            # danger: the fam_rxn may have switched the reactants with products
            # fam_rxn is survived from def filterReactions
            # so it's matched with lib_rxn only we have to 
            # determine the direction
            lib_reactants = [r for r in lib_rxn.reactants]        
            fam_reactants = [r for r in fam_rxn.reactants]
            for lib_reactant in lib_reactants:
                for fam_reactant in fam_reactants:
                    if lib_reactant.isIsomorphic(fam_reactant):
                        fam_reactants.remove(fam_reactant)
                        break

            lib_products = [r for r in lib_rxn.products]        
            fam_products = [r for r in fam_rxn.products]
            for lib_product in lib_products:
                for fam_product in fam_products:
                    if lib_product.isIsomorphic(fam_product):
                        fam_products.remove(fam_product)
                        break

            forward = not (len(fam_reactants) != 0 or len(fam_products) != 0)
            # find the labeled atoms using family and reactants & products from fam_rxn           
            addAtomLabelsForReaction(fam_rxn, database)
            # species replacement so that labeledAtoms is retored
            if forward:
                lib_rxn.reactants = fam_rxn.reactants
                lib_rxn.products = fam_rxn.products
            else:
                lib_rxn.reactants = fam_rxn.products
                lib_rxn.products = fam_rxn.reactants
            if fam_rxn.family in reactionDict:
                reactionDict[fam_rxn.family].append(lib_rxn)
            else:
                reactionDict[fam_rxn.family] = [lib_rxn]

    #     elif len(rmgReactionList) == 0:
    #         print reaction
    #         print 'reactants'
    #         for reactant in reaction.reactants:
    #             print reactant.molecule[0].toSMILES()
    #         print 'products'
    #         for product in reaction.products:
    #             print product.molecule[0].toSMILES()
    #         print "Sad :( There are no matches.  This is a magic reaction or has chemistry that should be made into a new reaction family"
    #     else:
    #         if rmgReactionList[0].family not in ['Intra_R_Add_Exocyclic','intra_H_migration']:
    #             print reaction
    #             for rxn in rmgReactionList:
    #                 print rxn.family
    #             print "There are multiple RMG matches for this reaction. You have to manually create this training reaction"

In [None]:
for familyName in reactionDict:
    print 'Adding training reactions for family: ' + familyName
    kineticFamily = database.kinetics.families[familyName]
    trainingDatabase = None
    for depository in kineticFamily.depositories:
            if depository.label.endswith('training'):
                trainingDatabase = depository
                break
    reactions = reactionDict[familyName]
    print 'reactions.py previously has {} rxns. Now adding {} new rxns'.\
    format(len(trainingDatabase.entries.values()), len(reactions))
    kineticFamily.saveTrainingReaction(reactions)

In [4]:
for familyName in reactionDict:
    print 'Adding training reactions for family: ' + familyName
    kineticFamily = database.kinetics.families[familyName]
    trainingDatabase = None
    for depository in kineticFamily.depositories:
            if depository.label.endswith('training'):
                trainingDatabase = depository
                break
    reactions = reactionDict[familyName]
    print 'reactions.py previously has {} rxns. Now adding {} new rxns'.\
    format(len(trainingDatabase.entries.values()), len(reactions))
    kineticFamily.saveTrainingReaction(reactions)

Adding training reactions for family: R_Addition_MultipleBond
reactions.py previously has 9 rxns. Now adding 38 new rxns
Adding training reactions for family: Intra_R_Add_Exocyclic
reactions.py previously has 0 rxns. Now adding 2 new rxns
Adding training reactions for family: Intra_R_Add_Endocyclic
reactions.py previously has 0 rxns. Now adding 12 new rxns
Adding training reactions for family: intra_H_migration
reactions.py previously has 4 rxns. Now adding 11 new rxns
Adding training reactions for family: R_Recombination
reactions.py previously has 9 rxns. Now adding 1 new rxns
Adding training reactions for family: H_shift_cyclopentadiene
reactions.py previously has 0 rxns. Now adding 2 new rxns


# How saveTrainingReaction works
## get speciesDict

### load existing species as an intial speciesDict

In [None]:
import os
from rmgpy.data.base import Database

training_path = os.path.join(settings['database.directory'], \
                             'kinetics', 'families', 'R_Addition_MultipleBond', 'training')

dictionary_file = os.path.join(training_path, 'dictionary.txt')

# Load the existing set of the species of the training reactions
speciesDict = Database().getSpecies(dictionary_file)

### for one family check uniqueness of each species in the lib_rxns

In [None]:
familyName = 'R_Addition_MultipleBond'
print 'Adding training reactions for family: ' + familyName
kineticFamily = database.kinetics.families[familyName]
reactions = reactionDict[familyName]

for rxn in reactions:
    for spec in (rxn.reactants + rxn.products):
        for ex_spec_label in speciesDict:
            ex_spec = speciesDict[ex_spec_label]
            if ex_spec.molecule[0].getFormula() != spec.molecule[0].getFormula():
                continue
            else:
                spec_labeledAtoms = spec.molecule[0].getLabeledAtoms()
                ex_spec_labeledAtoms = ex_spec.molecule[0].getLabeledAtoms()
                initialMap = {}
                try:
                    for atomLabel in spec_labeledAtoms:
                        initialMap[spec_labeledAtoms[atomLabel]] = ex_spec_labeledAtoms[atomLabel]
                except KeyError:
                    # atom labels did not match, therefore not a match
                    continue
                if spec.molecule[0].isIsomorphic(ex_spec.molecule[0],initialMap):
                    spec.label = ex_spec.label
                    break
        else:# no isomorphic existing species found
            spec_formula = spec.molecule[0].getFormula()
            if spec_formula not in speciesDict:
                spec.label = spec_formula
            else:
                index = 2
                while (spec_formula + '-{}'.format(index)) in speciesDict:
                    index += 1
                spec.label = spec_formula + '-{}'.format(index)
            speciesDict[spec.label] = spec

## save to files

Save reactionDict to reactions.py and speciesDict to dictionary.txt

In [None]:
# try to append 
training_file = open(os.path.join(settings['database.directory'], 'kinetics', 'families', \
            kineticFamily.label, 'training', 'reactions_test.py'), 'a')

training_file.write("\n\n")

In [None]:
# find the largest reaction index
for depository in kineticFamily.depositories:
    if depository.label.endswith('training'):
        break
else:
    logging.info('Could not find training depository in family {0}.'.format(kineticFamily.label))
    logging.info('Starting a new one')
    depository = KineticsDepository()
    kineticFamily.depositories.append(depository)

trainingDatabase = depository
indices = [entry.index for entry in trainingDatabase.entries.values()]
if indices:
    maxIndex = max(indices)
else:
    maxIndex = 0

In [None]:
# save reactions.py
from rmgpy.data.base import Entry
for i, reaction in enumerate(reactions):    
    entry = Entry(
        index = maxIndex+i+1,
        label = str(reaction),
        item = reaction,
        data = reaction.kinetics,
        reference = None,
        referenceType = '',
        shortDesc = unicode(''),
        longDesc = unicode(''),
        rank = 3,
        )
    print reaction
    kineticFamily.saveEntry(training_file, entry)

training_file.close()

In [None]:
# save dictionary.txt
directory_test_file = os.path.join(training_path, 'directory_test.txt')
with open(directory_test_file, 'w') as f:
    for label in speciesDict.keys():
        f.write(speciesDict[label].molecule[0].toAdjacencyList(label=label, removeH=False))
        f.write('\n')
f.close()