In [2]:
import sklearn as sk
import rdkit as rd
from rdkit.Chem import AllChem
from rdkit.Chem import Draw




In [3]:
help(sk)

Help on package sklearn:

NAME
    sklearn

DESCRIPTION
    Machine learning module for Python
    
    sklearn is a Python module integrating classical machine
    learning algorithms in the tightly-knit world of scientific Python
    packages (numpy, scipy, matplotlib).
    
    It aims to provide simple and efficient solutions to learning problems
    that are accessible to everybody and reusable in various contexts:
    machine-learning as a versatile tool for science and engineering.
    
    See http://scikit-learn.org for complete documentation.

PACKAGE CONTENTS
    __check_build (package)
    _build_utils (package)
    _config
    _isotonic
    base
    calibration
    cluster (package)
    compose (package)
    conftest
    covariance (package)
    cross_decomposition (package)
    datasets (package)
    decomposition (package)
    discriminant_analysis
    dummy
    ensemble (package)
    exceptions
    experimental (package)
    externals (package)
    feature_extraction (p

In [4]:
help(rd.Chem)

Help on package rdkit.Chem in rdkit:

NAME
    rdkit.Chem - A module for molecules and stuff

DESCRIPTION
    see Chem/index.html in the doc tree for documentation

PACKAGE CONTENTS
    AllChem
    AtomPairs (package)
    BRICS
    BuildFragmentCatalog
    ChemUtils (package)
    ChemicalFeatures
    ChemicalForceFields
    Crippen
    DSViewer
    Descriptors
    Descriptors3D
    Draw (package)
    EState (package)
    EnumerateHeterocycles
    EnumerateStereoisomers
    FastSDMolSupplier
    FeatFinderCLI
    FeatMaps (package)
    Features (package)
    FilterCatalog
    Fingerprints (package)
    Fraggle (package)
    FragmentCatalog
    FragmentMatcher
    Fragments
    FunctionalGroups
    GraphDescriptors
    Graphs
    Lipinski
    MACCSkeys
    MCS
    MolCatalog
    MolDb (package)
    MolKey (package)
    MolStandardize (package)
    MolSurf
    PandasTools
    Pharm2D (package)
    Pharm3D (package)
    PropertyMol
    PyMol
    QED
    Randomize
    Recap
    ReducedGraph

In [5]:
suppl = rd.Chem.SDMolSupplier('../data/raw/ChemDivFull.sdf')

In [6]:
print(suppl)

<rdkit.Chem.rdmolfiles.SDMolSupplier object at 0xa1d9a4c30>


In [7]:
# Extract only the 960 tested molecules
plates = ["CDIV%04d"%p for p in range(1,121,10)]
print(len(plates),plates)

12 ['CDIV0001', 'CDIV0011', 'CDIV0021', 'CDIV0031', 'CDIV0041', 'CDIV0051', 'CDIV0061', 'CDIV0071', 'CDIV0081', 'CDIV0091', 'CDIV0101', 'CDIV0111']


In [8]:
tested = [x for x in suppl if x.GetProp("BATCH_PLATE") in plates]
print(len(suppl),len(tested))

50000 960


In [9]:
for mol in tested:
    if mol is None: continue
    if mol.GetProp("BATCH_WELL")=="A02":
        print(mol.GetNumAtoms(),mol.GetProp("_Name"),mol.GetProp("BATCH_PLATE"),mol.GetProp("BATCH_WELL"))

11  CDIV0001 A02
24  CDIV0011 A02
26  CDIV0021 A02
23  CDIV0031 A02
23  CDIV0041 A02
28  CDIV0051 A02
24  CDIV0061 A02
20  CDIV0071 A02
24  CDIV0081 A02
23  CDIV0091 A02
29  CDIV0101 A02
20  CDIV0111 A02


In [10]:
for x in mol.GetPropNames(includePrivate=True,includeComputed=True):
    print(x,mol.GetProp(x))

__computedProps [numArom,_StereochemDone,]
_Name 
_MolFileInfo   -ISIS-  01311913252D
_MolFileComments 
numArom 3
_StereochemDone 1
compound_Corp_Reg_Number STF-085941
rotatable_bonds 6.000000000000000e+000
BATCH_NUMBER 1
BATCH_MW 3.824600000000000e+002
BATCH_VENDOR_CATALOG 6024-0185
BATCH_PLATE CDIV0111
BATCH_WELL H11
LogP 5.249700000000000e+000
LogS -6.007000000000000e+000
H_Acceptors 4
H_Donors 1
AnlTst_Display NMR
BatAnl_Link D:\Spectra\FirstSide\10K\6024-0185.emf
VENDOR_DISPLAY ChemDiv


In [11]:
AllChem.Compute2DCoords(mol)

0

In [12]:
print(mol)

<rdkit.Chem.rdchem.Mol object at 0xa1d9d4a30>


In [42]:
m2=rd.Chem.AddHs(mol)
AllChem.EmbedMolecule(m2)
AllChem.MMFFOptimizeMolecule(m2)
m2=rd.Chem.RemoveHs(m2)
img=Draw.MolsToGridImage([mol,m2],molsPerRow=2,subImgSize=(200,200),legends=["2d","3d"])
img.show()

In [14]:
print(rd.Chem.MolToMolBlock(tested[1]))


     RDKit          2D

 20 22  0  0  0  0  0  0  0  0999 V2000
    0.5917   -1.2875    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0125   -0.4667    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    1.5917   -1.2875    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0125   -2.0792    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    2.1167   -0.4042    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.9250   -0.7750    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.1167   -2.1375    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.9250   -1.7750    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.6250   -3.0167    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    1.5917    0.4458    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.5542    2.1417    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0417    3.0250    0.0000 F   0  0  0  0  0  0  0  0  0  0  0  0
    0.5917    0.4083    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.1167    1.2958    0

In [30]:
def getMol(mols,plate,well):
    mol=[x for x in mols if x.GetProp("BATCH_PLATE")=="CDIV%04d"%plate and x.GetProp("BATCH_WELL")==well]
    assert(len(mol)==1)
    return mol[0]

In [38]:
# Compare some molecules which give the same response
m91d2=getMol(tested,91,'D02')
m91e2=getMol(tested,91,"E02")
img=Draw.MolsToGridImage([m91d2,m91e2],molsPerRow=2,subImgSize=(200,200),legends=["91E2","91D2"])
img.show()

In [44]:
from rdkit.Chem import rdFMCS
res=rdFMCS.FindMCS([m91d2,m91e2])
res.smartsString

'[#6]12:[#6](:[#7]:[#6]3:[#7](:[#6]:1=[#8]):[#6]:[#6]:[#6]:[#6]:3):[#7](:[#6](:[#6](:[#6]:2)-[#6](=[#8])-[#7]-[#6])=[#7])-[#6]-[#6]-[#6]-[#8]-[#6]'

In [45]:
res

<rdkit.Chem.rdFMCS.MCSResult at 0xa22abfd00>

In [47]:
# Check chemical features
from rdkit import Chem
from rdkit.Chem import ChemicalFeatures
from rdkit import RDConfig
import os
fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
feats = [factory.GetFeaturesForMol(x) for x in [m91d2,m91e2]]

In [51]:
len(feats[1])

13

In [53]:
help(feats[0][0])

Help on MolChemicalFeature in module rdkit.Chem.rdMolChemicalFeatures object:

class MolChemicalFeature(Boost.Python.instance)
 |  Class to represent a chemical feature.
 |  These chemical features may or may not have been derived from molecule object;
 |  i.e. it is possible to have a chemical feature that was created just from its type
 |  and location.
 |  
 |  Method resolution order:
 |      MolChemicalFeature
 |      Boost.Python.instance
 |      builtins.object
 |  
 |  Static methods defined here:
 |  
 |  ClearCache(...)
 |      ClearCache( (MolChemicalFeature)arg1) -> None :
 |          Clears the cache used to store position information.
 |      
 |          C++ signature :
 |              void ClearCache(RDKit::MolChemicalFeature {lvalue})
 |  
 |  GetActiveConformer(...)
 |      GetActiveConformer( (MolChemicalFeature)arg1) -> int :
 |          Gets the conformer to use.
 |      
 |          C++ signature :
 |              int GetActiveConformer(RDKit::MolChemicalFeature {

In [61]:
for fs in feats:
    print("")
    for f in fs:
        print(f.GetFamily(),f.GetType(),f.GetAtomIds())


Donor SingleAtomDonor (11,)
Donor SingleAtomDonor (19,)
Acceptor SingleAtomAcceptor (2,)
Acceptor SingleAtomAcceptor (13,)
Acceptor SingleAtomAcceptor (15,)
Acceptor SingleAtomAcceptor (21,)
Aromatic Arom6 (0, 1, 5, 7, 8, 2)
Aromatic Arom6 (0, 1, 9, 4, 6, 3)
Aromatic Arom6 (7, 8, 14, 17, 16, 12)
Hydrophobe ThreeWayAttach (1,)
Hydrophobe ThreeWayAttach (4,)
Hydrophobe ChainTwoWayAttach (20,)

Donor SingleAtomDonor (13,)
Donor SingleAtomDonor (19,)
Acceptor SingleAtomAcceptor (2,)
Acceptor SingleAtomAcceptor (16,)
Acceptor SingleAtomAcceptor (17,)
Acceptor SingleAtomAcceptor (21,)
Aromatic Arom6 (0, 1, 6, 3, 8, 2)
Aromatic Arom6 (0, 1, 9, 5, 7, 4)
Aromatic Arom6 (3, 8, 12, 15, 14, 10)
Hydrophobe ThreeWayAttach (1,)
Hydrophobe ThreeWayAttach (5,)
Hydrophobe ThreeWayAttach (14,)
Hydrophobe ChainTwoWayAttach (20,)


In [62]:
# Generate pharmacophore fingerprints 
from rdkit import Chem
from rdkit.Chem import ChemicalFeatures
fdefName = 'data/MinimalFeatures.fdef'
featFactory = ChemicalFeatures.BuildFeatureFactory(fdefName)

OSError: File: data/MinimalFeatures.fdef could not be opened.

In [None]:
from rdkit.Chem.Pharm2D.SigFactory import SigFactory
sigFactory = SigFactory(featFactory,minPointCount=2,maxPointCount=3)
sigFactory.SetBins([(0,2),(2,5),(5,8)])
sigFactory.Init()
sigFactory.GetSigSize()