In [22]:
from __future__ import print_function

from rdkit import Chem
from rdkit.Chem import AllChem

import gzip, cPickle
import copy
import progressbar

In [14]:
## Alkenes
# M = markovnikov, N = anti-markovnikov
# Requires [Si] marker for M/N or carbene reactions

HX_M = AllChem.ReactionFromSmarts('[Si:1]=[C:2].[Cl,Br,I:3]>>[C:1]([*:3])[C:2]')
HX_N = AllChem.ReactionFromSmarts('[Si:1]=[C:2].[Br:3]>>[C:2]([Br:3])[C:1]')

HOH_M = AllChem.ReactionFromSmarts('[Si:1]=[C:2].[OH2:3]>>[C:1]([O:3])[C:2]')
HOH_N = AllChem.ReactionFromSmarts('[Si:1]=[C:2].[OH2:3]>>[C:2]([O:3])[C:1]')

ROH_M = AllChem.ReactionFromSmarts('[Si:1]=[C:2].[C:4][O:3]>>[C:1]([O:3][C:4])[C:2]')
ROH_N = AllChem.ReactionFromSmarts('[Si:1]=[C:2].[C:4][O:3]>>[C:2]([O:3][C:4])[C:1]')

BH3 = AllChem.ReactionFromSmarts('[Si:1]=[C:2].[B:3]>>[C:2]([B:3])[C:1]')

RED = AllChem.ReactionFromSmarts('[C:1]=[C:2].[H][H]>>[C:1][C:2]')

CARB_BROMO = AllChem.ReactionFromSmarts('[C:1]=[C:2].Br[C:3](Br)Br>>[C:1]1[C:3](Br)(Br)[C:2]1')

X2_H2O = AllChem.ReactionFromSmarts('[Si:1]=[C:2].[Cl,Br,I:3][Cl,Br,I:4]>>[C:1](O)[C:2]([*:3])')
X2 = AllChem.ReactionFromSmarts('[C:1]=[C:2].[Cl,Br,I:3][Cl,Br,I:4]>>[C:1]([*:3])[C:2]([*:4])')

EPOXI = AllChem.ReactionFromSmarts('[C:1]=[C:2]>>[C:1]1O[C:2]1')

OX = AllChem.ReactionFromSmarts('[C:1]=[C:2]>>[C:1](O)[C:2](O)')

OZONE_OX = AllChem.ReactionFromSmarts('[C:1]=[C:2].O~O~O>>[C:1](=O).[C:2](=O)')
OZONE_RED = AllChem.ReactionFromSmarts('[C:1]=[C:2].O~O~O>>[C:1](=O).[C:2](=O)')

OX_ALDEHYDE = AllChem.ReactionFromSmarts('[C!D3:1]=[O:2]>>[C:1](O)=[O:2]') 

POLY = AllChem.ReactionFromSmarts('[Si:1]=[C:2].[Si:3]=[C:4]>>[C:1]=[C:2][C:3][C:4]')


alkene_MN_reactions_list = [
    
    ('HX_M', HX_M, ['Cl','Br','I'], ['']),
    ('HX_N', HX_N, ['Br'], ['CCOOCC', 'CC(=O)OOC(=O)C']),
    
    ('HOH_M', HOH_M, ['O'], ['OS(O)(=O)=O','[Hg+2].CC([O-])=O.CC([O-])=O.[Na+].[BH4-]']),
    ('HOH_N', HOH_N, ['O'], ['B.C1CCOC1.OO.[Na+].[OH-]']),
    
    ('ROH_M', ROH_M, ['CO', 'CCO', 'CCCO'], ['[Hg+2].CC([O-])=O.[Na+].[BH4-]']),
    ('ROH_N', ROH_N, ['CO', 'CCO', 'CCCO'], ['B.C1CCOC1.OO.[K+].[OH-]', 'B.C1CCOC1.OO.[Na+].[OH-]']),
    
    ('BH3', BH3, ['B'], ['C1CCOC1']),
    
    ('X2_H2O', X2_H2O, ['ClCl', 'BrBr', 'II'], ['O']),
    ('POLY', POLY, ['OS(O)(=O)=O', 'Cl'], ['']),
]

alkene_reactions_list = [
    
    ('RED', RED, ['[H][H]'], ['[Pt]', '[Pd]', '[Ni]', '']),
    
    ('CARB_BROMO', CARB_BROMO, ['BrC(Br)Br'], ['[Na+].[OH-]']),
    
    ('X2', X2, ['ClCl','BrBr', 'II'], ['','ClCCl']),
    
    ('EPOXI', EPOXI, [''], ['CC(=O)OO', 'CCC(=O)OO', 'CCCC(=O)OO', 'OOC(=O)c1cccc(Cl)c1']),
    ('OX', OX, [''], ['O=[Os](=O)(=O)=O', '[K+].[O-][Mn](=O)(=O)=O']),
    ('OZONE_OX', OZONE_OX, ['O=[O+][O-]'], ['OO']),
    ('OZONE_RED', OZONE_RED, ['O=[O+][O-]'], ['CSC', 'NC(=S)N', '[Zn].O']),
]

In [26]:
def cano(smiles): # canonicalize smiles by MolToSmiles function
    return Chem.MolToSmiles(Chem.MolFromSmiles(smiles))

def block(ch, smiles):
    return (ch + cano(smiles)) if (smiles != '') else ''

In [21]:
with gzip.open('data/subst/alkene.pkl.gz', 'rb') as f:
    alkene_list = cPickle.load(f)

length = len(alkene_list)
print(length)

7781


In [23]:
rxns = []

bar = progressbar.ProgressBar(max_value=length)

# choose alkene
for i, alkene_smi in enumerate(alkene_list):
    alkene = Chem.MolFromSmiles(alkene_smi)
    alkene_MN = copy.deepcopy(alkene)

    # mark [Si]
    C_1, C_2 = alkene_MN.GetSubstructMatches(Chem.MolFromSmiles('C=C'))[0]
    alkene_MN.GetAtomWithIdx((C_2 if (alkene_MN.GetAtomWithIdx(C_1).GetTotalNumHs() \
            > alkene_MN.GetAtomWithIdx(C_2).GetTotalNumHs()) else C_1)).SetAtomicNum(14);

    # MN reactions
    for reaction in alkene_MN_reactions_list:
        for reagent in reaction[2]:
            if reagent == '': products = reaction[1].RunReactants((alkene_MN,))
            else: products = reaction[1].RunReactants((alkene_MN, Chem.MolFromSmiles(reagent)))
            if reaction[0] == 'POLY':
                products = reaction[1].RunReactants((alkene_MN, alkene_MN))
            product_smi = [Chem.MolToSmiles(product) for product in products[0]]
            for sub_reagent in reaction[3]:
                rxns.append(cano(alkene_smi) + block('.', reagent) + '>' + cano(sub_reagent) + '>' + '.'.join(product_smi))

    # general reactions
    for reaction in alkene_reactions_list:
        for reagent in reaction[2]:
            if reagent == '': products = reaction[1].RunReactants((alkene,))
            else: products = reaction[1].RunReactants((alkene, Chem.MolFromSmiles(reagent)))
            if reaction[0] == 'OZONE_OX': 
                new_products = []
                for product in products[0]:
                    oxid_product = OX_ALDEHYDE.RunReactants((product,))
                    new_products.append(oxid_product[0][0] if len(oxid_product) != 0 else product)
                products =(new_products,)
            product_smi = [Chem.MolToSmiles(product) for product in products[0]]
            for sub_reagent in reaction[3]:
                rxns.append(cano(alkene_smi) + block('.', reagent) + '>' + cano(sub_reagent) + '>' + '.'.join(product_smi))
    
    bar.update(i)

 99% (7779 of 7781) |###################### | Elapsed Time: 0:01:30 ETA: 0:00:00

In [24]:
print(len(rxns))

342364
