#! wget -c https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
#! chmod +x Miniconda3-latest-Linux-x86_64.sh
#! bash ./Miniconda3-latest-Linux-x86_64.sh -b -f -p /usr/local

! wget -c https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh
! chmod +x Miniconda3-py37_4.8.3-Linux-x86_64.sh
! bash ./Miniconda3-py37_4.8.3-Linux-x86_64.sh -b -f -p /usr/local

! conda install -q -y -c rdkit rdkit 

import sys
sys.path.append('/usr/local/lib/python3.7/site-packages/')
!pip install git+https://github.com/keras-team/keras-tuner.git

In [None]:
import warnings , os, re
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
import tensorflow as tf

import tensorflow.keras as keras
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from keras.utils import np_utils

from kerastuner.tuners import RandomSearch, BayesianOptimization
from kerastuner.engine.hypermodel import HyperModel
from kerastuner.engine.hyperparameters import HyperParameters, Choice

from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem, Draw, rdDistGeom
from rdkit.Chem.Draw import IPythonConsole
from rdkit import RDLogger
RDLogger.logger().setLevel(RDLogger.CRITICAL)

Create molecule

In [None]:
ala = Chem.MolFromSmiles( 'N[C@@H](C)C(O)=O' )
ala

In [None]:
Chem.MolToSmiles(ala)

In [None]:
Chem.MolToMolBlock(ala)

In [None]:
Chem.MolToInchi(ala)

3D Coordinates - Geometry optimization

In [None]:
ala = Chem.MolFromSmiles( 'N[C@@H](C)C(O)=O' )
alaH = Chem.AddHs(ala)
rdDistGeom.EmbedMolecule(alaH)
alaH

In [None]:
ff = AllChem.UFFGetMoleculeForceField(alaH)
print( "Before=", ff.CalcEnergy() )
AllChem.UFFOptimizeMolecule(alaH)
print( "After=", ff.CalcEnergy() )
alaH

Descriptors, Fingerprints

In [None]:
from rdkit.Chem import Descriptors
from rdkit.Chem.Fingerprints import FingerprintMols

In [None]:
phe = Chem.MolFromSmiles('N[C@H](C(O)=O)Cc1ccccc1')
print( Descriptors.MolWt(phe), Descriptors.MolLogP(phe), Descriptors.RingCount(phe), Descriptors.TPSA(phe) )
phe

Database

In [None]:
def mol_from_smiles( row ) :
    smiles = row[ 'smiles' ]
    m = Chem.MolFromSmiles( smiles )
    activity = row[ 'pChembl_Value' ]
    m.SetProp( 'pChembl_Value', str(activity) )
    return m


url_smi = 'https://raw.githubusercontent.com/chchae/VSLecture/master/data/egfr/ChEMBL27_EGFR_aminopyridine.smi'
db_smi = pd.read_table( url_smi, delimiter=' ', names=['smiles', 'pChembl_Value'], header=None, )
mols_egfr = [ mol_from_smiles(row) for _, row in db_smi.iterrows() ]

print( '%d-molecules read...' % len(mols_egfr) )
Draw.MolsToGridImage( mols_egfr[:7], molsPerRow=4, subImgSize=(300,300) )

Substructure search

In [None]:
smiles = 'c1cc(ccc1Nc1ncc(Cl)c(Oc2ccccc2)n1)'
query = Chem.MolFromSmiles( smiles )
AllChem.Compute2DCoords(query)
query

In [None]:
result = [ x for x in mols_egfr if x.HasSubstructMatch(query) ]
print( "Found = ", len( result ), " from ", len( mols_egfr ) )
for m in result:
    AllChem.GenerateDepictionMatching2DStructure( m, query )

Draw.MolsToGridImage( result[:10], molsPerRow=3, subImgSize=(300,300), highlightAtomLists=[ m.GetSubstructMatch(query) for m in result ]  )

MCS

In [None]:
from rdkit.Chem import rdFMCS

mols_sub = mols_egfr[:20]
mcsf = rdFMCS.FindMCS( mols_sub )
mcs = Chem.MolFromSmarts( mcsf.smartsString )
mcs

In [None]:
Draw.MolsToGridImage( mols_sub[:10], molsPerRow=3, subImgSize=(250,250), highlightAtomLists=[ m.GetSubstructMatch(mcs) for m in mols_sub ] )

R-Group decomposition

In [None]:
from rdkit.Chem import rdRGroupDecomposition as rdRGD

mcsf = rdFMCS.FindMCS( mols_egfr )
mcs = Chem.MolFromSmarts( mcsf.smartsString )
mcs = query

matched,unmatched = rdRGD.RGroupDecompose([mcs],mols_egfr[:],asSmiles=True)
print( len(matched), len(unmatched) )
print( matched[:3] )
Chem.MolFromSmiles( matched[4]['Core'])

Fingerprints

In [None]:
from rdkit.Chem.Fingerprints import FingerprintMols

ala = Chem.MolFromSmiles( 'CC(C(=O)O)N' )
bi = {}
fp = AllChem.GetMorganFingerprintAsBitVect( ala, 2, nBits=1024, bitInfo=bi )
#fp_arr = np.zeros( (1, ) )
#DataStructs.ConvertToNumpyArray( fp, fp_arr )
#np.nonzero( fp_arr )
#list( fp.GetOnBits() )

mfps = [ ( ala, x, bi ) for x in fp.GetOnBits() ]
Draw.DrawMorganBits( mfps, molsPerRow=4, subImgSize=(150,150), legends=[ str(x) for x in fp.GetOnBits() ] )

Fingerprint similarity search

In [None]:
fps = [ FingerprintMols.FingerprintMol(m) for m in mols_egfr ]
fp_ref = fps[1]

val = []
for i in range(len(fps)):
    val.append( DataStructs.FingerprintSimilarity( fp_ref, fps[i] ) )
print( np.array_str( np.array(val), precision=2 ) )

Fingerprint similarity density map

In [None]:
from rdkit.Chem.Draw import SimilarityMaps

ref = mols_sub[0]
mol = mols_sub[1]
fig, maxweight = SimilarityMaps.GetSimilarityMapForFingerprint( ref, mol, SimilarityMaps.GetMorganFingerprint )

Draw.MolsToGridImage( [ref, mol], molsPerRow=3, subImgSize=(300,300) )

Fingerprint descriptors map - charge and logP

In [None]:
from rdkit.Chem import rdMolDescriptors

m = mols_sub[0]
#m = Chem.MolFromSmiles( 'Cc1c(Nc2nccc(c3cccnc3)n2)cc(NC(c4ccc(CN5CCN(C)CC5)cc4)=O)cc1' )

AllChem.ComputeGasteigerCharges( m )

charges = [ m.GetAtomWithIdx(i).GetDoubleProp('_GasteigerCharge') for i in range(m.GetNumAtoms()) ]
print( "Charges = ", charges )
SimilarityMaps.GetSimilarityMapFromWeights(m, charges, colorMap='jet', contourLines=10)

In [None]:
logpmr = rdMolDescriptors._CalcCrippenContribs(m)
print( "(LogP, MR) = ", logpmr )
SimilarityMaps.GetSimilarityMapFromWeights(mol,[x for x,y in logpmr], colorMap='jet', contourLines=10)

SMIRKS - Chemical Reactions

In [None]:
rxn = AllChem.ReactionFromSmarts('[C:1](=[O:2])-[OD1].[N!H0:3]>>[C:1](=[O:2])[N:3]')
rxn

In [None]:
rx1 = Chem.MolFromSmiles('C(COC(=O)O)C(=O)O')
rx2 = Chem.MolFromSmiles('NC')
ps = rxn.RunReactants((rx1,rx2) )
print( len(ps) )
Draw.MolsToGridImage( [ rx1, rx2, ps[0][0], ps[1][0] ], molsPerRow=2, subImgSize=(300,300) )

RECAP

In [None]:
from rdkit.Chem import Recap
m = Chem.MolFromSmiles('c1ccccc1OCCOC(=O)CC')
# m = Chem.MolFromSmiles( 'Cc1c(Nc2nccc(c3cccnc3)n2)cc(NC(c4ccc(CN5CCN(C)CC5)cc4)=O)cc1' )
hierarch = Recap.RecapDecompose(m)
print( hierarch.children.keys() )
hierarch.mol

In [None]:
print 'test'