In [1]:
#!/usr/bin/env python3
from openeye.oechem import *
from openeye.oemedchem import *
from openeye.oedepict import *
import pandas as pd
import collections as clct

In [2]:
# Bemis Murcko Fragmentation Example 1
mol = OEGraphMol()
OESmilesToMol(mol, "CCOc1ccc(cc1)CC(OC)c2ccccc2CC(=O)N")

adjustHCount = True
for frag in OEGetBemisMurcko(mol):
    fragment = OEGraphMol()
    OESubsetMol(fragment, mol, frag, adjustHCount)
    print(".".join(r.GetName() for r in frag.GetRoles()), OEMolToSmiles(fragment))

Framework c1ccc(cc1)CCc2ccccc2
Ring c1ccccc1.c1ccccc1
Linker CC
Sidechain CCO.CC(=O)N.CO


In [3]:
# Bemis Murcko Fragmentation Example 2: taking only rings
mol = OEGraphMol()
OESmilesToMol(mol, "CN(C)C\C=C\C(=O)NC1=C(O[C@H]2CCOC2)C=C2N=CN=C(NC3=CC(Cl)=C(F)C=C3)C2=C1")

adjustHCount = True
for frag in OEGetBemisMurcko(mol):
    fragment = OEGraphMol()
    OESubsetMol(fragment, mol, frag, adjustHCount)
    for role in frag.GetRoles():
        if role.GetName() == "Ring":
            print(role.GetName())
            print(OEMolToSmiles(fragment))

Ring
c1ccccc1.c1ccc2c(c1)cncn2.C1CCOC1


In [4]:
# Import CSV of FDA-approved kinase inhibitors
df= pd.read_csv("USFDA_approved_PKIs_SMILES.csv")
PKI_SMILES = list(df["SMILES"])

In [5]:
ring_list = []

for SMILES in PKI_SMILES:
    #print(SMILES)
    
    mol = OEGraphMol()
    OESmilesToMol(mol, SMILES)

    adjustHCount = True
    for frag in OEGetBemisMurcko(mol):
        fragment = OEGraphMol()
        OESubsetMol(fragment, mol, frag, adjustHCount)
    
        for role in frag.GetRoles():
            if role.GetName() == "Ring":
                #print(role.GetName())
                rings_SMILES = OEMolToSmiles(fragment)
                # print(rings_SMILES)
                
                individual_rings_SMILES = rings_SMILES.split(".")
                #print(individual_rings_SMILES )
                for r_smiles in individual_rings_SMILES:
                    ring_list.append(r_smiles)

print(ring_list)             

['c1ccccc1', 'c1ccc2c(c1)cncn2', 'C1CCOC1', 'c1ccc2c(c1)c3c([nH]2)Cc4cc(ccc4C3)N5CCC(CC5)N6CCOCC6', 'c1ccccc1', 'c1ccncc1', 'c1ccc2c(c1)c[nH]n2', 'c1ccccc1', 'c1ccc2c(c1)cccn2', 'C1CNCCN1', 'c1ccccc1', 'c1ccc(cc1)N2CCC(CC2)N3CCNCC3', 'c1cncnc1', 'c1ccccc1', 'c1ccccc1', 'c1ccc2c(c1)cccn2', 'C1CC1', 'c1ccccc1', 'c1ccc(cc1)C2CCNCC2', 'c1cncnc1', 'c1ccccc1', 'c1ccccc1', 'C1CCN[C@@H](C1)C2CNC2', 'c1ccccc1', 'c1cc(cnc1)c2cnn(c2)C3CCNCC3', 'c1ccccc1', 'c1ccc(cc1)c2c(scn2)c3ccncn3', 'c1ccccc1', 'c1cncnc1N2CCNCC2', 'c1cscn1', 'c1ccccc1', 'c1ccc2c(c1)cncn2', 'C1CCCCC1', 'C1CCC/C=C/C=C/C=C/CC[C@@H]2CCC[C@@H](O2)CCN3CCCC[C@H]3COCCCC/C=C/CC1', 'c1ccccc1', 'c1ccc2c(c1)cncn2', 'C1COCCN1', 'c1ccccc1', 'c1ccc(cc1)c2c3cncnc3n(n2)[C@@H]4CCCNC4', 'c1ccccc1', 'c1ccccc1', 'c1cc(cnc1)c2ccncn2', 'C1CNCCN1', 'c1ccccc1', 'c1ccccc1', 'c1cc(oc1)c2ccc3c(c2)cncn3', 'c1ccccc1', 'c1ccc2c(c1)cccn2', 'C1CC1', 'c1ccccc1', 'c1ccc2c(c1)c3c4c(c5c6ccccc6n7c5c3n2[C@H]8CCC[C@@H]7O8)CNC4', 'c1ccccc1', 'c1ccncc1', 'c1ccc2c(c1)c

In [6]:
ring_cnt = clct.Counter()
for smiles in ring_list:
    ring_cnt[smiles] += 1
print(ring_cnt)

Counter({'c1ccccc1': 36, 'C1CNCCN1': 4, 'c1ccc2c(c1)cncn2': 4, 'c1ccncc1': 4, 'c1ccc2c(c1)cccn2': 4, 'c1cncnc1': 3, 'C1CCC/C=C/C=C/C=C/CC[C@@H]2CCC[C@@H](O2)CCN3CCCC[C@H]3COCCCC/C=C/CC1': 3, 'C1CCCCC1': 3, 'c1cc(cnc1)c2ccncn2': 2, 'c1cc(cnc1)N2CCNCC2': 2, 'C1CCNCC1': 2, 'C1CC1': 2, 'c1ccc2c(c1)c[nH]n2': 2, 'c1ccc(cc1)c2c(scn2)c3ccncn3': 1, 'c1ccc(cc1)N2CN(CC3=CNCC=C32)C4CC4': 1, 'c1ccc(cc1)c2c3cncnc3n(n2)[C@@H]4CCCNC4': 1, 'C1COCCN1': 1, 'c1cscn1': 1, 'c1c[nH]c2c1cncn2': 1, 'c1cncnc1N2CCNCC2': 1, 'c1cc(cnc1)c2cnn(c2)C3CCNCC3': 1, 'C1CCN[C@@H](C1)C2CNC2': 1, 'c1ccc2c(c1)CC=N2': 1, 'c1ccc2c(c1)CCN2': 1, 'c1ccc2c(c1)c3c([nH]2)Cc4cc(ccc4C3)N5CCC(CC5)N6CCOCC6': 1, 'c1ccc2c(c1)c(c[nH]2)c3ccncn3': 1, 'c1ccc(cc1)C2CCNCC2': 1, 'c1ccc(cc1)c2cc3cc[nH]c3nc2': 1, 'c1c2c(ncn1)N(CC=C2)C3CCCC3': 1, 'c1cn(c2c1cncn2)C3CCCC3': 1, 'c1c[nH]c2c1c(ncn2)c3c[nH]nc3': 1, 'c1ccc(cc1)N2CCC(CC2)N3CCNCC3': 1, 'c1ccc(cc1)n2ccnc2': 1, 'c1cc(oc1)c2ccc3c(c2)cncn3': 1, 'c1cc2nccn2nc1': 1, 'C1CCOC1': 1, 'c1ccc2c(c1)c3c4c

In [7]:
# Add extra SMILES to the end for even grid of 2D depictions 
ring_cnt["C"]= 0

ring_cnt_sorted = sorted(ring_cnt, key=ring_cnt.get, reverse=True)
print(len(ring_cnt_sorted))

for smiles in ring_cnt_sorted:
    print(smiles, ring_cnt[smiles])    

40
c1ccccc1 36
C1CNCCN1 4
c1ccc2c(c1)cncn2 4
c1ccncc1 4
c1ccc2c(c1)cccn2 4
c1cncnc1 3
C1CCC/C=C/C=C/C=C/CC[C@@H]2CCC[C@@H](O2)CCN3CCCC[C@H]3COCCCC/C=C/CC1 3
C1CCCCC1 3
c1cc(cnc1)c2ccncn2 2
c1cc(cnc1)N2CCNCC2 2
C1CCNCC1 2
C1CC1 2
c1ccc2c(c1)c[nH]n2 2
c1ccc(cc1)c2c(scn2)c3ccncn3 1
c1ccc(cc1)N2CN(CC3=CNCC=C32)C4CC4 1
c1ccc(cc1)c2c3cncnc3n(n2)[C@@H]4CCCNC4 1
C1COCCN1 1
c1cscn1 1
c1c[nH]c2c1cncn2 1
c1cncnc1N2CCNCC2 1
c1cc(cnc1)c2cnn(c2)C3CCNCC3 1
C1CCN[C@@H](C1)C2CNC2 1
c1ccc2c(c1)CC=N2 1
c1ccc2c(c1)CCN2 1
c1ccc2c(c1)c3c([nH]2)Cc4cc(ccc4C3)N5CCC(CC5)N6CCOCC6 1
c1ccc2c(c1)c(c[nH]2)c3ccncn3 1
c1ccc(cc1)C2CCNCC2 1
c1ccc(cc1)c2cc3cc[nH]c3nc2 1
c1c2c(ncn1)N(CC=C2)C3CCCC3 1
c1cn(c2c1cncn2)C3CCCC3 1
c1c[nH]c2c1c(ncn2)c3c[nH]nc3 1
c1ccc(cc1)N2CCC(CC2)N3CCNCC3 1
c1ccc(cc1)n2ccnc2 1
c1cc(oc1)c2ccc3c(c2)cncn3 1
c1cc2nccn2nc1 1
C1CCOC1 1
c1ccc2c(c1)c3c4c(c5c6ccccc6n7c5c3n2[C@H]8CCC[C@@H]7O8)CNC4 1
C1CCCC1 1
c1cc[nH]c1 1
C 0


In [8]:
# Depict all rings
font = OEFont()
font.SetSize(40)
font.SetAlignment(OEAlignment_Center)

molecules = []
for smi in ring_cnt_sorted:
    mol = OEGraphMol()
    OESmilesToMol(mol, smi)
    OEPrepareDepiction(mol)
    molecules.append(OEGraphMol(mol))
    
image = OEImage(5000, 8000)

rows, cols = 8, 5
grid = OEImageGrid(image, rows, cols)

opts = OE2DMolDisplayOptions(grid.GetCellWidth(), grid.GetCellHeight(), OEScale_AutoScale)

minscale = float("inf")
for mol in molecules:
    minscale = min(minscale, OEGetMoleculeScale(mol, opts))

opts.SetScale(minscale)
for idx, cell in enumerate(grid.GetCells()):
    disp = OE2DMolDisplay(molecules[idx], opts)
    OERenderMolecule(cell, disp)
    
    data = "freq = " + str(ring_cnt[ring_cnt_sorted[idx]]) 
    offset = 200
    cell.DrawText(OE2DPoint(350, offset), data, font)

OEWriteImage("all_rings.png", image)

True

In [9]:
# Select subset of dictionary for rings appearing more than once
ring_cnt_multiple = {k: v for k, v in ring_cnt.items() if v > 1}
#print(ring_cnt_multiple)

# Add extra SMILES to the end for even grid of 2D depictions 
ring_cnt_multiple["C"]= 0

# Sort based on frequency
ring_cnt_multiple_sorted = sorted(ring_cnt_multiple, key=ring_cnt_multiple.get, reverse=True)

for smiles in ring_cnt_multiple_sorted:
    print(smiles, ring_cnt_multiple[smiles])

c1ccccc1 36
c1ccc2c(c1)cncn2 4
c1ccc2c(c1)cccn2 4
c1ccncc1 4
C1CNCCN1 4
C1CCCCC1 3
c1cncnc1 3
C1CCC/C=C/C=C/C=C/CC[C@@H]2CCC[C@@H](O2)CCN3CCCC[C@H]3COCCCC/C=C/CC1 3
C1CCNCC1 2
c1ccc2c(c1)c[nH]n2 2
C1CC1 2
c1cc(cnc1)c2ccncn2 2
c1cc(cnc1)N2CCNCC2 2
C 0


In [10]:
# Depict frequent rings seen more than once
smiles_of_frequent_rings=[]
for smiles in ring_cnt_multiple_sorted:
    smiles_of_frequent_rings.append(smiles)

print(smiles_of_frequent_rings)


    
font = OEFont()
font.SetSize(60)
font.SetAlignment(OEAlignment_Center)

molecules = []
for smi in smiles_of_frequent_rings:
    mol = OEGraphMol()
    OESmilesToMol(mol, smi)
    OEPrepareDepiction(mol)
    molecules.append(OEGraphMol(mol))
    
image = OEImage(5000, 2000)

rows, cols = 2, 7
grid = OEImageGrid(image, rows, cols)

opts = OE2DMolDisplayOptions(grid.GetCellWidth(), grid.GetCellHeight(), OEScale_AutoScale)

minscale = float("inf")
for mol in molecules:
    minscale = min(minscale, OEGetMoleculeScale(mol, opts))

opts.SetScale(minscale)
for idx, cell in enumerate(grid.GetCells()):
    disp = OE2DMolDisplay(molecules[idx], opts)
    OERenderMolecule(cell, disp)
    
    data = "freq = " + str(ring_cnt_multiple[smiles_of_frequent_rings[idx]]) 
    offset = 200
    cell.DrawText(OE2DPoint(350, offset), data, font)

OEWriteImage("frequent_rings.png", image)

['c1ccccc1', 'c1ccc2c(c1)cncn2', 'c1ccc2c(c1)cccn2', 'c1ccncc1', 'C1CNCCN1', 'C1CCCCC1', 'c1cncnc1', 'C1CCC/C=C/C=C/C=C/CC[C@@H]2CCC[C@@H](O2)CCN3CCCC[C@H]3COCCCC/C=C/CC1', 'C1CCNCC1', 'c1ccc2c(c1)c[nH]n2', 'C1CC1', 'c1cc(cnc1)c2ccncn2', 'c1cc(cnc1)N2CCNCC2', 'C']


True