Final version of SMARTS

Firstly, I need to import all of the necessary libraries

In [1]:
from rdkit import rdBase
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdFMCS
from rdkit.Chem import rdRascalMCES
import pandas as pd

Secondly, I need to import the data from our csv file

I will preprocess it followingly: 
- i will only take enzymes that are promiscuous or moderately-promiscuous
- every missing value will be -1 (i dont thik there are any but it is there as a precaution)
- if there are more products noted, i will only take the longest (assuming that the reaction happening alters the main core of the substrate which stays the longest)
- i will group the dataframe by enzyme name and save all substrates and products each into one array separated by ';'

In [2]:
df = pd.read_csv("Cytochrome_P450_database.csv", delimiter = ";")
df2 = df.copy()
df2_altered = df2[df2['Product SMILES'].notna()]
df2_altered_2 = df2_altered[(df2_altered['promiscuity'] != 'specific')]
working_df = df2_altered_2[df2_altered_2['promiscuity'] != 'no-reaction']
working_df = working_df[working_df['Product SMILES'] != -1]

def select_longest_product(products):
    product_list = products.split(';')
    longest_product = max(product_list, key=len)
    return longest_product

# Apply the function to each row to get longest products
working_df['Product SMILES'] = working_df['Product SMILES'].apply(select_longest_product)

In [3]:
promiscuous_df = pd.DataFrame(columns=['protein', 'Substrate SMILES', 'Product SMILES'])

for protein, group_df in working_df.groupby('protein'):
    substrate = ';'.join(group_df['Substrate SMILES'])
    product = ';'.join(group_df['Product SMILES'].astype(str))
    promiscuous_df.loc[len(promiscuous_df.index)] = [protein, substrate, product]

Now, the dataframe that i will be searching in, is ready.

First step of the process is to find substrates and products in the df according to the name of the enzyme, which can be done by the following function:

In [4]:
def subs_prods_smiles(protein_name, df_t):
    # iterating over each row in the dataframe, trying to find matching protein name
    # once it finds the corresponding row, it saves substrates and products and returns them
    for _, row in df_t.iterrows():
        prot = row['protein']
        smiles = row['Substrate SMILES']
        prods = row['Product SMILES']
        if prot == protein_name:
            sub_smiles = smiles.split(';') 
            prod_smiles = prods.split(';')
    return sub_smiles,prod_smiles

Next step is to create groups from the substrates and products, this will be done based on the **chemical reaction** happening, using the following functions:

In [5]:
#This is a function which gets us the index of the changing atom
def get_changing_index(molecule, pattern):
    """
    Gets the index of the changing atom.
    The reason why it needs to be complicated is that the indexing of substrate and product is not the same -
    the changing atom has different indexes in each structure.
    """
    hit_atoms = list(molecule.GetSubstructMatch(pattern))
    all_atoms_prod = list(molecule.GetSubstructMatch(molecule))
    resulting_index = set(all_atoms_prod)-set(hit_atoms)
    return resulting_index

In [6]:
def set_pattern(structure1,structure2):
    """
    Sets pattern and molecule, then uses get_changing_index().
    If pattern is bigger than the molecule (happens when pattern is a product and molecule is a substrate),uses MCS.
    """
    pattern = Chem.MolFromSmiles(structure1)
    molecule = Chem.MolFromSmiles(structure2)
    hit_atoms = list(molecule.GetSubstructMatch(pattern))
    # First, we need to check if there are any hit atoms = if the molecule has a pattern
    if len(hit_atoms) == 0:
        MCS_pattern = rdFMCS.FindMCS([molecule,pattern])
        pattern = Chem.MolFromSmarts(MCS_pattern.smartsString)
        hit_atoms = list(molecule.GetSubstructMatch(pattern))
    if len(hit_atoms) == 0:
        raise Exception("Hit atoms still zero after using MCS!")
    return get_changing_index(molecule, pattern)

In [7]:
def chemical_reaction(subs,prod):
    """ 
    Takes a substrate and a product as an input, uses set_pattern() to find the index of a changing atom and the valence.
    Returns changing atom and valence for substrate and product
    """
    index_1 = set_pattern(subs,prod)
    index_2 = set_pattern(prod,subs)
    # Here it checks if the index in substrate is valid, if yes it records the valence and symbol
    if index_2:
        index_subs = list(index_2)[0]
        changing_atom_subs = Chem.MolFromSmiles(subs).GetAtomWithIdx(index_subs).GetSymbol()
        changing_valence_subs = Chem.MolFromSmiles(subs).GetAtomWithIdx(index_subs).GetExplicitValence()
        substrate_list = [changing_atom_subs, changing_valence_subs]
    # If it isnt valid (is an empty set), the atom does not exist (either was added or removed by the reaction)
    else:
        substrate_list = []
    # Here it does the same thing for the index in product
    if index_1:
        index_prod = list(index_1)[0]
        changing_atom_prod = Chem.MolFromSmiles(prod).GetAtomWithIdx(index_prod).GetSymbol()
        changing_valence_prod = Chem.MolFromSmiles(prod).GetAtomWithIdx(index_prod).GetExplicitValence()
        product_list = [changing_atom_prod, changing_valence_prod]
    else:
        product_list = []
    return substrate_list, product_list

In [8]:
def create_groups(input_subs,input_prods):
    """ Takes a list of substrate and product structures as an input. 
    For each substrate/product pair uses chemical_reaction() to identify the chemical reaction.
    Output is a dictionary, key is the reaction and value is a list of indexes. 
    eg. {('O',1): [0,1,2], ('O',2): [3,4,5]}
    """
    # Firstly, it creates an empty dictionary to later store recorded reactions
    individual_reactions = {}
    # It loops through all substrate/product pairs, identifies the changing atom and valence and saves it into the dictionary
    for i in range(len(input_subs)):
        substrate_change,product_change = chemical_reaction(input_subs[i],input_prods[i])
        individual_reactions[i] = substrate_change + product_change

    # Next, it creates a final dictionary
    reaction_groups = {}
    # It loops through the individual_reactions dicitonary, uses the reaction as a key and assigns corresponding indexes as values
    for index,individual_reaction in individual_reactions.items():
        individual_reaction_tuple = tuple(individual_reaction)
        if individual_reaction_tuple in reaction_groups:
            reaction_groups[individual_reaction_tuple].append(index)
        else:
            reaction_groups[individual_reaction_tuple] = [index]
    return reaction_groups

Third step is to possibly split the created groups. The reason behind this being that the groups are initially created based on chemical reactions, they do not take into consideration what kinds of molecules they are - if we did not split them, the generated MCS would be more general (we want specific)

In [9]:
def find_max_value(combinations_list):
    """ 
    Input is a list of combinations: [[index1, index2, X3, similarity], [index1, index22, X3, similarity]]
    Finds max similarity value (located at last index of each list)
    Output is the maximum value and the corresponding combination (list)
    """
    max_combination = max(combinations_list, key=lambda x: x[-1])
    max_value = max_combination[-1]
    return max_value, max_combination

In [10]:
def create_group(input_combination, input_dictionary):
    """
    First input is a list of 4 values, representing a combination of structures: [index1, index2, X3, similarity],
    second input is a dictionary consisting of groups created in this step: {'group1': [0,1,2], 'group2': [3]}
    Ouput is a dictionary
    """
    i_1, i_2, _, _ = input_combination 
    inserted = False
    # It loops through all the values and checks if the indexes are already present 
    for group in input_dictionary.values():
        if i_1 in group or i_2 in group:
            # If one of the indexes is in a group and the second one isnt, it adds the missing one
            if i_1 not in group:
                group.append(i_1)
            if i_2 not in group:
                group.append(i_2)
            inserted = True
    # If they arent present, it creates a new group
    if not inserted:
        input_dictionary[f"group{len(input_dictionary)+1}"] = [i_1,i_2] 
    return input_dictionary

In [11]:
def merge_groups(input_dictionary):
    """ Used to aviod duplicate groups. 
    Through the process, two groups such as [0,1,2] and [2,1,0] can be created, they represent the same structures so having two of them is unnecessary and wrong.
    Input is a dictionary in the following form: {'group1': [0,1,2], 'group2': [2,1,0], 'group3': [3]}
    Ouput is a new dictionary that does not have duplicate values: {'group1': [0,1,2], 'group2': [3]}
    """    
    # Firstly, create array of tuples out of all values in the input dictionary
    list_of_values = [tuple(sorted(value)) for key, value in input_dictionary.items()]
    unique_values = list(set(list_of_values))
    output_dictionary = { ('group' + str(i+1)): list(values) for i, values in enumerate(unique_values) }
    return output_dictionary

In [12]:
from itertools import combinations
def MCS_groups(input, substrate_structures, input_threshold):
    """ 
    First input is a list of indexes to be evaluated, second input is a list of all structures for a specfic enzyme.
    Third input is the threshold that will be used for similarity comparison.
    Creates all possible combinations of given indexes and finds MCES for each combination.
    Saved in the form of list of lists: [[0, 1, 23, 0.987], [0, 2, 18, 0.765], [1, 2, 12, 0.543]]
    Then, finds max similarity value and compares to a threshold, if >= threshold, creates new groups; if < checks the presence od indexes and potentially adds them.
    Saves in  form of a dictionary: {'group1': [0,1,2], 'group2': [2,1,0], 'group3': [3]}
    Uses merge_groups() to avoid duplicates and outputs a dictionary: {'group1': [0,1,2], 'group2': [3]}
    """
    # Preparing empty list to store information, creating all possible combinations of structures 
    MCS_combinations = []
    all_combinations = list(combinations(input,2))
    # It loops through all combinations and uses Rascal MCES to find MCS
    for combination in all_combinations:
        mcs2 = rdRascalMCES.FindMCES(Chem.MolFromSmiles(substrate_structures[combination[0]]), Chem.MolFromSmiles(substrate_structures[combination[1]]))
        # Here we check if the result is valid = if there is a MCS, if it isnt we record zeroes
        last_two = [len(mcs2[0].atomMatches()), mcs2[0].similarity] if mcs2 != [] else [0, 0]
        MCS_combinations.append([combination[0], combination[1]] + last_two)

    # In the second part of the fucntion it splits the groups, it compares the similarity to a threshold
    threshold = input_threshold
    new_groups = {}
    # It loops through all the MCS combinations, finds the max value, compares it to threshold
    while (len(MCS_combinations) > 0):
        max_value, its_combination = find_max_value(MCS_combinations) 
        # If the similarity is above the threshold, it creates a new group
        if max_value >= threshold:
            new_groups = create_group(its_combination, new_groups)
        else:
            # If the similarity is below the threshold, it has to separate the structures
            i_1, i_2, _, _ = its_combination
            found1 = False
            found2 = False
            # Here it loop through the new_group dictionary and looks if the indexes of the strctures are already recorded
            for indices in new_groups.values():
                found1 |= i_1 in indices
                found2 |= i_2 in indices
            # If they are, they have already been split, if one of them isnt there, it creates a new group for this index
            if not found1:
                new_groups[f"group{len(new_groups)+1}"] = [i_1]
            if not found2:
                new_groups[f"group{len(new_groups)+2}"] = [i_2]
        MCS_combinations.remove(its_combination)
    newly_split_groups = merge_groups(new_groups)
    return newly_split_groups

In [13]:
def split_groups(input,substrate_structures, threshold): 
    """
    First input is a dictionary of chemical reactions created in a previous step: {('O', 1): [0, 1, 2, 3], ('O', 2): [4, 5, 6]}
    Second input is a list of all substrate structures: ['HCOOH','C**',....]
    Third input is the threshold that will be used for similarity comparisons
    For each value from the input dictionary checks the length, if > 1 tries to split the groups. 
    Output is a dictionary of split groups: {('O', 1, 'group1'): [0, 1, 2],('O', 1, 'group2'):  [3], ('O', 2, 'group1'): [4, 5, 6]}
    """
    split_dictionary = {}
    # Iterate over all reaction/structure indexes pairs
    for key,value in input.items(): # key, value = ('O', 1), [0, 1, 2, 3]
        # It checks if there are more indexes than one, if yes it uses MCS_groups2 function to try to split the groups
        if len(value) != 1:
            output = MCS_groups(value, substrate_structures, threshold) # output: {'group1': [0, 1, 2], 'group2': [3]}
            for output_key, output_value in output.items():
                new_key = (*key,output_key)
                split_dictionary[new_key] = (output_value)
        else:
            split_dictionary[key] = value
    return split_dictionary

After the groups are finalized, all thats left is finding the MCS for substrates and products and joining it into reactions:

In [14]:
def finding_MCS(input,grouped_dict):
    """
    Fourth step of the whole process, finds MCS for each group.
    First input is a list of structures: ['HCOOH','C**',....],
    Second input is the dictionary created in the previous step: {('O', 1, 'group1'): [0, 1, 2],('O', 1, 'group2'):  [3], ('O', 2, 'group1'): [4, 5, 6]},
    If there is just one index, uses its corresponding SMARTS structure, if there are more, finds MCES using Rascal.
    Output is a dictionary: {('O', 1, 'group1'): 'MCSs1',('O', 1, 'group2'):  'MCSs2', ('O', 2, 'group1'): 'MCSs3'}.
    """
    d_out = {}
    # It iterates over each reaction-indexes pair
    for change, indexes in grouped_dict.items():
        seznam = [input[x] for x in indexes]
        seznam_smiles = [Chem.MolFromSmiles(x) for x in seznam]
        # Checks the number of indexes, if it is only one, it uses SMARTS structure
        if len(indexes) == 1:
            structure = Chem.MolToSmarts(seznam_smiles[0])
            structure = structure.replace('[#6]', 'C').replace('[#8]', 'O')
            d_out[change] = structure
        # If there are multiple, it picks the first two and creates MCS using Rascal MCES
        else:
            res_smiles = rdRascalMCES.FindMCES(seznam_smiles[0],seznam_smiles[1])
            result = res_smiles[0].smartsString.replace('[#6]', 'C').replace('[#8]', 'O').replace('[#8-]', 'O-')
            d_out[change] = result
    return d_out

The following function joins the substrate a product MCS and creates a list of reactions.

In [15]:
# And this is the final function. The inputs are dictionaries for substrates and products that contain MCS as values and reactions as keys
def reactions(subs_structures, prod_structures):
    """
    Last step, creates reactions.
    First input is a dictionary of substrate groups and corresponding MCS structures: {('O', 1, 'group1'): 'MCSs1',('O', 1, 'group2'):  'MCSs2', ('O', 2, 'group1'): 'MCSs3'}
    Second input is a dictionary of product groups and corresponding MCS structures: {('O', 1, 'group1'): 'MCSp1',('O', 1, 'group2'):  'MCSp2', ('O', 2, 'group1'): 'MCSp3'}
    Output is a list of reactions: [['MCSs1>>MCSp1'], ['MCSs2>>MCSp2'], ['MCSs3>>MCSp3']]
    """
    reactions = []
    for i in zip(subs_structures.values(), prod_structures.values()):
        reaction = i[0] + '>>' + i[1]
        reactions.append([reaction])
    return reactions

This is where the generative part ends, the ouput of the last step is a list of reactions

I shall connect it all into one function called **find_reactions()**

In [16]:
def find_reactions(name_of_enzyme, df_t, threshold):
    s,p = subs_prods_smiles(name_of_enzyme,df_t)
    groupes_dict = create_groups(s, p)
    output = split_groups(groupes_dict,s, threshold)
    structures_subs = finding_MCS(s, output)
    structures_prods = finding_MCS(p, output)
    list_of_reactions = reactions(structures_subs,structures_prods)
    return list_of_reactions

In [20]:
find_reactions('O77809',promiscuous_df, 0.75)



[['CCC.CCC(-OO)-C=CC=CCC.CCCCC(-O)=O>>CCC.CCC(=O)-C=CC=CCC.CCCCC(-O)=O'],
 ['C-C/C=C\\C/C=C\\C/C=C\\C/C=C\\C/C=C\\C/C=C\\C-C-C(-[#8-])=O>>C(-C(-[#8-])=O)-C/C=C\\C/C=C\\C/C=C\\C/C=C\\C/C=C\\C-[#6@@H]1-[#6@H](-C-C)-O-1'],
 ['C-C/C=C\\C/C=C\\C/C=C\\C/C=C\\C/C=C\\C-C-C-C(-[#8-])=O>>C-C-[#6@@H]1-O-[#6@@H]-1-C/C=C\\C/C=C\\C/C=C\\C/C=C\\C-C-C-C(-[#8-])=O'],
 ['CCCCCC=CCC=CCC=CCC=CCCCC(-O)=O>>C(-CCC)-CC1C(-CC=CCC=CCC=CCCCC(-O)=O)-O1'],
 ['C12CCC3(-C)-C(-O)-CCC3C1CCc1:c:c(-O):c:c:c-2:1>>C1(-CCC2(-C)-C(-O)-CCC2C1CC)-c1:c:c:c(-O):c(-O):c:1'],
 ['[#6H](=O)/C=C(-C)/C=C/C=C(-C)/C=C/C1=C(-C)-C-C-C-C-1(-C)-C>>C-C(/C=C/C1=C(-C)-C-C-C-C-1(-C)-C)=C\\C=C\\C(-C)=C\\C(-[#8-])=O'],
 ['C1-[#6@@]2(-[#6@H]3-C-C-[#6@]4(-[#6@H](-[#6@@H]-3-C-C=C-2-C-[#6@H](-C-1)-O)-C-C-[#6@@H]-4-[#6@H](-C)-C-C-C-C(-C)-C)-C)-C>>C1-[#6@@H](-[#6@@]2(-[#6@@H](-C-1)-[#6@@H]1-C-C=C3-C-[#6@@H](-O)-C-C-[#6@]-3(-C)-[#6@H]-1-C-C-2)-C)-[#6@H](-C)-C-C-C-C(-C)(-C)-O'],
 ['CCCCCC=CCC=CCC=CCC=CCCCC(-O)=O>>O=C(-CCCC=CCC=CCC=CCC=CCCCCC)-O'],
 ['C-

In order to show how it works, I called the function once with all the prints, I changed the code now to only print the output

In [18]:
reactions_i1gqe7 = find_reactions('I1GQE7',promiscuous_df, 0.75)
print(reactions_i1gqe7)

[['C12CCC3C(-CCC4(-C)-C(-CCC3-4)-C(-C)-C(-O)-C(-O)-C(-C)-C(-C)-C)-C1(-C)-CCCC2>>C1(-CCC2C3CC(=O)-C4CCCCC4(-C)-C3CCC1-2-C)-C(-C)-C(-O)-C(-O)-C(-C)-C(-C)-C'], ['C12CCC3C(-CCC4(-C)-C(-CCC3-4)-C(-C)-C(-O)-C(-O)-C(-C)-C(-C)-C)-C1(-C)-CCCC2>>C1(-C(-C(-C(-C(-C(-C)-C)-C)-O)-O)-C)-C2(-CCC3C(-CC(-C4CCCCC3-4-C)-O)-C2CC1)-C'], ['C1(-C(-C(-C(-C(-C(-C)-C)-C)-O)-O)-C)-C2(-CCC3C(-CC(-C4CCCCC3-4-C)-O)-C2CC1)-C>>C1(-CCC2C3CC(=O)-C4CCCCC4(-C)-C3CCC1-2-C)-C(-C)-C(-O)-C(-O)-C(-C)-C(-C)-C']]


In [21]:
reactions_b8qhp1 = find_reactions('B8QHP1',promiscuous_df, 0.75)
print(reactions_b8qhp1)

[['C-[#0]-C(-[#8-])=O>>O-C-[#0]-C(-[#8-])=O'], ['CCCCCC=CCC=CCCCCCCCC(-O)=O>>C(=CCC=CCCC)-CCCCCCCC(=O)-O.C(-C)-O'], ['C(-[#0]-C(-[#8-])=O)-C>>C(=O)(-[#8-])-[#0]-C(-C)-O']]


In [22]:
reactions_o77809 = find_reactions('O77809',promiscuous_df, 0.75)
print(reactions_o77809)



[['CCC.CCC(-OO)-C=CC=CCC.CCCCC(-O)=O>>CCC.CCC(=O)-C=CC=CCC.CCCCC(-O)=O'], ['C-C/C=C\\C/C=C\\C/C=C\\C/C=C\\C/C=C\\C/C=C\\C-C-C(-[#8-])=O>>C(-C(-[#8-])=O)-C/C=C\\C/C=C\\C/C=C\\C/C=C\\C/C=C\\C-[#6@@H]1-[#6@H](-C-C)-O-1'], ['C-C/C=C\\C/C=C\\C/C=C\\C/C=C\\C/C=C\\C-C-C-C(-[#8-])=O>>C-C-[#6@@H]1-O-[#6@@H]-1-C/C=C\\C/C=C\\C/C=C\\C/C=C\\C-C-C-C(-[#8-])=O'], ['CCCCCC=CCC=CCC=CCC=CCCCC(-O)=O>>C(-CCC)-CC1C(-CC=CCC=CCC=CCCCC(-O)=O)-O1'], ['C12CCC3(-C)-C(-O)-CCC3C1CCc1:c:c(-O):c:c:c-2:1>>C1(-CCC2(-C)-C(-O)-CCC2C1CC)-c1:c:c:c(-O):c(-O):c:1'], ['[#6H](=O)/C=C(-C)/C=C/C=C(-C)/C=C/C1=C(-C)-C-C-C-C-1(-C)-C>>C-C(/C=C/C1=C(-C)-C-C-C-C-1(-C)-C)=C\\C=C\\C(-C)=C\\C(-[#8-])=O'], ['C1-[#6@@]2(-[#6@H]3-C-C-[#6@]4(-[#6@H](-[#6@@H]-3-C-C=C-2-C-[#6@H](-C-1)-O)-C-C-[#6@@H]-4-[#6@H](-C)-C-C-C-C(-C)-C)-C)-C>>C1-[#6@@H](-[#6@@]2(-[#6@@H](-C-1)-[#6@@H]1-C-C=C3-C-[#6@@H](-O)-C-C-[#6@]-3(-C)-[#6@H]-1-C-C-2)-C)-[#6@H](-C)-C-C-C-C(-C)(-C)-O'], ['CCCCCC=CCC=CCC=CCC=CCCCC(-O)=O>>O=C(-CCCC=CCC=CCC=CCC=CCCCCC)-O'], ['C-C(=C\\C-

In [23]:
reactions_o46658 = find_reactions('O46658',promiscuous_df, 0.75)
print(reactions_o46658)

[['C(-CCCCCCCC)-CCC(-O)=O>>CCCCCCCCCCCC(-O)=O'], ['C12CC(-O)-CCC1(-C)-C1CCC3(-C)-C(-CCC3C1C(-O)-C2)-C(-C)-CCCC(-C)-C>>C12(-C(-C3C(-CC1)-C1(-C(-CC(-CC1)-O)-CC3O)-C)-CCC2C(-CCCC(-C)(-C)-O)-C)-C']]


In [24]:
reactions_e9qy26 = find_reactions('E9QY26',promiscuous_df, 0.75)
print(reactions_e9qy26)

[['C1C2(-C3CCC4(-C(-C=3-CCC2C(-C(-C1)-O)(-C)-C)(-CCC4C(-C)-CCC)-CO)-C)-C.C(-C)-C>>C1C2(-C3CCC4(-C(-C=3-CCC2C(-C(-C1)-O)(-C)-C)(-CCC4C(-C)-CCC)-C=O)-C)-C.C(-C)-C'], ['C12-C(-[#6@]3(-C(-C(-C-C-3)-[#0])(-C)-C-C-1)-C-O)-C-C-C1-C-2(-C-C-C-C-1)-C>>C-C12-C-C-C3-C(-C-C-C4-C-C-C-C-C-3-4-C)-[#6@@]-1(-C-C-C-2-[#0])-C=O'], ['C1-[#6@@]2(-C3-C-C-[#6@]4(-[#6@](-C=3-C-C-[#6@H]-2-C(-[#6@H](-C-1)-O)(-C)-C)(-C-C-[#6@@H]-4-[#6@H](-C)-C-C-C(-C(-C)-C)=C)-C=O)-C)-C>>C1-[#6@@]2(-C3-C-C-[#6@]4(-C(-C=3-C-C-[#6@H]-2-C(-[#6@H](-C-1)-O)(-C)-C)=C-C-[#6@@H]-4-[#6@H](-C)-C-C-C(-C(-C)-C)=C)-C)-C'], ['C-C12-C-C-C3-C(-C-C-C4-C-C-C-C-C-3-4-C)-[#6@@]-1(-C-C-C-2-[#0])-C=O>>C12-C(-C3-C(-C(-C-C=3)-[#0])(-C)-C-C-1)-C-C-C1-C-2(-C-C-C-C-1)-C'], ['O-[#6@@H]1-C(-[#6@H]2-[#6@@](-C3=C(-[#6@]4(-[#6@@](-[#6@H](-C-C-4)-[#6@@H](-C-C-C=C(-C)-C)-C)(-C-C-3)-C)-C=O)-C-C-2)(-C-C-1)-C)(-C)-C>>C1-[#6@@H](-[#6@@]2(-C(=C-1)-C1=C(-C-C-2)-[#6@@]2(-C)-C-C-[#6@H](-O)-C(-C)(-C)-[#6@@H]-2-C-C-1)-C)-[#6@H](-C)-C-C-C=C(-C)-C'], ['C12-C(-[#6@]3(-C(-C(-C

In [25]:
reactions_b8qhp3 = find_reactions('B8QHP3',promiscuous_df, 0.75)
print(reactions_b8qhp3)

[['CCCCCCC=CCCCCCCCC(-O)=O>>C(-CCC)-CCCCC(-O)=O.CCCCCCCO'], ['C-[#0]-C(-[#8-])=O>>O-C-[#0]-C(-[#8-])=O']]


In [26]:
reactions_i1rjr2 = find_reactions('I1RJR2',promiscuous_df, 0.75)
print(reactions_i1rjr2)

[['C1C2(-C3CCC4(-C(-C=3-CCC2C(-C(-C1)-O)(-C)-C)(-CCC4C(-C)-CCC)-CO)-C)-C.C(-C)-C>>C1C2(-C3CCC4(-C(-C=3-CCC2C(-C(-C1)-O)(-C)-C)(-CCC4C(-C)-CCC)-C=O)-C)-C.C(-C)-C'], ['C12-C(-[#6@]3(-C(-C(-C-C-3)-[#0])(-C)-C-C-1)-C-O)-C-C-C1-C-2(-C-C-C-C-1)-C>>C-C12-C-C-C3-C(-C-C-C4-C-C-C-C-C-3-4-C)-[#6@@]-1(-C-C-C-2-[#0])-C=O'], ['C1-[#6@@]2(-C3-C-C-[#6@]4(-[#6@](-C=3-C-C-[#6@H]-2-C(-[#6@H](-C-1)-O)(-C)-C)(-C-C-[#6@@H]-4-[#6@H](-C)-C-C-C(-C(-C)-C)=C)-C=O)-C)-C>>C1-[#6@@]2(-C3-C-C-[#6@]4(-C(-C=3-C-C-[#6@H]-2-C(-[#6@H](-C-1)-O)(-C)-C)=C-C-[#6@@H]-4-[#6@H](-C)-C-C-C(-C(-C)-C)=C)-C)-C'], ['C-C12-C-C-C3-C(-C-C-C4-C-C-C-C-C-3-4-C)-[#6@@]-1(-C-C-C-2-[#0])-C=O>>C12-C(-C3-C(-C(-C-C=3)-[#0])(-C)-C-C-1)-C-C-C1-C-2(-C-C-C-C-1)-C'], ['O-[#6@@H]1-C(-[#6@H]2-[#6@@](-C3=C(-[#6@]4(-[#6@@](-[#6@H](-C-C-4)-[#6@@H](-C-C-C=C(-C)-C)-C)(-C-C-3)-C)-C=O)-C-C-2)(-C-C-1)-C)(-C)-C>>C1-[#6@@H](-[#6@@]2(-C(=C-1)-C1=C(-C-C-2)-[#6@@]2(-C)-C-C-[#6@H](-O)-C(-C)(-C)-[#6@@H]-2-C-C-1)-C)-[#6@H](-C)-C-C-C=C(-C)-C'], ['C12-C(-[#6@]3(-C(-C(-C

In [27]:
reactions_q09128 = find_reactions('Q09128',promiscuous_df, 0.75)
print(reactions_q09128)

[['C1CCC2C(-CCCC1-2-C)=CC=C1CC(-O)-CC(-O)-C1=C.C(-C)-CC=O>>C1(-CCC2C(-CCCC1-2-C)=CC=C1CC(-O)-CC(-O)-C1=C)-C(-C)-CCO'], ['C1(-CCC2C(-CCCC1-2-C)=CC=C1CC(-O)-CCC1=C)-C(-C)-C.CO>>C1CCC2C(-CCCC1-2-C)=CC=C1CC(-O)-CCC1=C.C(-C)-CC=O']]


This can be done for any and every enzyme in the dataset.