# Linking molecule fragments via reaction SMIRKs
Anjali Dixit

Purpose: 
Generate molecules containing  combinations of various molecular fragments or substructures (i.e. substructures containing particular SMIRKS). Here, I experiment with using reaction SMIRKS to link substructures to generate libraries of molecules.

Here: Link molecules using a specific reaction schemes for library creation (Reductive Amination). This file will show a reductive amination itilizing SDF files from Enamine 


In [None]:
# Import OpenEye stuff
import openeye.oechem as oechem
import openeye.oedepict as oedepict
import openeye.oemolprop as oemolprop
from IPython.display import display
import openeye.oeomega as oeomega


# Add utility function for depiction
def depict(mol, width=500, height=200):
    from IPython.display import Image
    dopt = oedepict.OEPrepareDepictionOptions()
    dopt.SetDepictOrientation( oedepict.OEDepictOrientation_Horizontal)
    oedepict.OEPrepareDepiction(mol, dopt)
    opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
    disp = oedepict.OE2DMolDisplay(mol, opts)
    ofs = oechem.oeosstream()
    oedepict.OERenderMolecule(ofs, 'png', disp)
    ofs.flush()
    return Image(data = "".join(ofs.str()))


# Try out reactions


In [None]:
# Test by linking two molecules - anything with an aldehyde (H-C=O) can react with an amine (H-N-H) to form 
# a bond betwen the C-N atoms, dropping the C=O. 
libgen = oechem.OELibraryGen("[C:1](=[O:2]).[C:3][N:4][C:7]>>[C:1][N:4][C:7]") 
mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, 'C(C=O)')
libgen.SetStartingMaterial(mol, 0)
mol.Clear()
oechem.OESmilesToMol(mol, 'CN')
libgen.SetStartingMaterial(mol, 1)

mols = []
for product in libgen.GetProducts():
    print("product smiles= %s" %oechem.OEMolToSmiles(product))
    mols.append(oechem.OEMol(product))
    
# Depict result
#depict(mols[0])

## Proceed to library generation

### First, build building block libraries. Use SDF files of building blocks 

Raw SDF Files can also be found on Enamine's company website. 

In [None]:
# Build two building block libraries for linking


# Build overall set of reactants
# Load one set of building blocks: Primary amines. uncomment lines below to troubleshoot
istream = oechem.oemolistream('Enamine_Primary_Amines_30459cmpds_wSmiles.sdf')
primary_amines = []
ct = 0
for oemol in istream.GetOEGraphMols():
    # Create a smiles string using OpenEye OEChem
    #print(oechem.OEMolToSmiles(oemol))
    if oechem.OECalculateMolecularWeight(oemol) <= 200:
        primary_amines.append(oechem.OEMolToSmiles(oemol))
    elif ("Br" in oechem.OEMolToSmiles(oemol) or "Cl" in oechem.OEMolToSmiles(oemol)):
        primary_amines.append(oechem.OEMolToSmiles(oemol))
        
print(len(primary_amines))

istream.close()


In [None]:
# Load second set of building blocks: Aldehydes. Uncomment to troubleshoot
istream = oechem.oemolistream('Enamine_Aldehydes_5798cmpds_wSmiles.sdf')
aldehydes = []

for oemol in istream.GetOEGraphMols():
    # Create a smiles string using OpenEye OEChem
    #print(oechem.OEMolToSmiles(oemol))
    if oechem.OECalculateMolecularWeight(oemol) <= 200:
        aldehydes.append(oechem.OEMolToSmiles(oemol))
    elif ("Br" in oechem.OEMolToSmiles(oemol) or "Cl" in oechem.OEMolToSmiles(oemol)):
        aldehydes.append(oechem.OEMolToSmiles(oemol))
        
print(len(aldehydes))
    
istream.close()


## Next step: Begin linking our aldehydes and primary amines

### First configure our reaction

Filter according to the Rule of 3: MW <300, XlogP <= 3, HBA and HBD groups <= 3 (based on Lipinski rules). Filtering still cannot handle molecules like selentium, silicon, salts and molecules with uncharacterized LogP values for these functional groups)

In [None]:
# Start the for loop for enumeration over both sets. 
#Whatever the order of reactants is for the SMIRKS reaction 
#needs to match the order the starting materials are written in.

libgen = oechem.OELibraryGen("[C:1](=[O:2]).[N:4]([H:5])([H:6])[#6:7]>>[C:1][N:4]([H:5])[#6:7]")
libgen.SetValenceCorrection(True)


omega = oeomega.OEOmega()
omega.SetMaxConfs(1)
omega.SetStrictStereo(False)

ofs = oechem.oemolostream('Enamine_ReductiveAminations.sdf')
ofs.SetFormat(oechem.OEFormat_SDF)

products = []
mol = oechem.OEGraphMol()

for idx_ald, ald in enumerate(aldehydes):
    if idx_ald < 1500:
        oechem.OESmilesToMol(mol, aldehydes[idx_ald])
        libgen.SetStartingMaterial(mol, 0)
        
        for idx_amine, amine in enumerate(primary_amines):
            
            if idx_amine < 1500:
                mol.Clear()
                oechem.OESmilesToMol(mol, primary_amines[idx_amine])
                libgen.SetStartingMaterial(mol, 1)
                
                for product in libgen.GetProducts():
                    if (oechem.OECalculateMolecularWeight(product) <= 300 and oemolprop.OEGetLipinskiAcceptorCount(product)<=3 and oemolprop.OEGetLipinskiDonorCount(product)<=3 and oemolprop.OEGetRotatableBondCount(product) <= 3): #if product satisfies these conditions, then add it to the list
                        try:
                            oemolprop.OEGetXLogP(product)
                            if oemolprop.OEGetXLogP(product) <= 3:    
                                #print(oechem.OEMolToSmiles(product))
                                if not oechem.OEHasSDData(product, "Aldehyde Reactant"):
                                    oechem.OEAddSDData(product,"Aldehyde Reactant", aldehydes[idx_ald])
                                    oechem.OEAddSDData(product,"Primary Amine Reactant", primary_amines[idx_amine])
                                    oemol.SetTitle("Smiles: %s \n Aldehyde: %s, Amine: %s" % (oechem.OEMolToSmiles(product), oechem.OEGetSDData(product, "Aldehyde"),oechem.OEGetSDData(product, "Amine")) )

                                products.append(oechem.OEMol(product))
                                
                        except RuntimeError:
                            print("Error: product=",oechem.OEMolToSmiles(product))
                                                        
             #check if there is a Br or Cl in the molecule; in this case, exclude it from the MW filter.
            
                    elif (("Br" in oechem.OEMolToSmiles(product) or "Cl" in oechem.OEMolToSmiles(product)) and oemolprop.OEGetLipinskiAcceptorCount(product)<=3 and oemolprop.OEGetLipinskiDonorCount(product)<=3):
                        try:
                            oemolprop.OEGetXLogP(product)
                            if oemolprop.OEGetXLogP(product) <= 3:    
                                #print(oechem.OEMolToSmiles(product))
                                if not oechem.OEHasSDData(product, "Aldehyde Reactant"):
                                    oechem.OEAddSDData(product,"Aldehyde Reactant", aldehydes[idx_ald])
                                    oechem.OEAddSDData(product,"Primary Amine Reactant", primary_amines[idx_amine])
                                    oemol.SetTitle("Smiles: %s \n Aldehyde: %s, Amine: %s" % (oechem.OEMolToSmiles(product), oechem.OEGetSDData(product, "Aldehyde Reactant"),oechem.OEGetSDData(product, "Amine Reactant")) )

                                products.append(oechem.OEMol(product))
                        except RuntimeError:
                            print("Error: product=",oechem.OEMolToSmiles(product))       
               
                mol.Clear()
                
                if idx_amine % 1500 == 0: #match value to idx_amine contraint value
                    print(f'{idx_ald}') #count to check progress based on # aldehydes reacted
            else:
                break
    else:
        break
        
print('Filtered based on RO3\n') 
print('Number of Compounds:' + str(len(products)))

#make sure it worked and stored information 

#for product in products:
#    print('\n'+ oechem.OEMolToSmiles(product)) 
   

# Generate a conformer for each molecule and write out to another SDF file

In [None]:
for oemol in products:
    omega(oemol)
    oechem.OEWriteMolecule(ofs1, oemol)
ofs.close()
    