In [None]:
#! wget -c https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
#! chmod +x Miniconda3-latest-Linux-x86_64.sh
# ! bash ./Miniconda3-latest-Linux-x86_64.sh -b -f -p /usr/local

! wget -c https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh
! chmod +x Miniconda3-py37_4.8.3-Linux-x86_64.sh
! bash ./Miniconda3-py37_4.8.3-Linux-x86_64.sh -b -f -p /usr/local

! conda install -q -y -c rdkit rdkit 

import sys
sys.path.append('/usr/local/lib/python3.7/site-packages/')
!pip install git+https://github.com/keras-team/keras-tuner.git

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
import warnings , os, re
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf

import tensorflow.keras as keras
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from keras.utils import np_utils

from kerastuner.tuners import RandomSearch, BayesianOptimization
from kerastuner.engine.hypermodel import HyperModel
from kerastuner.engine.hyperparameters import HyperParameters, Choice

In [None]:
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem, Draw, rdDistGeom
from rdkit.Chem.Draw import IPythonConsole
from rdkit import RDLogger
RDLogger.logger().setLevel(RDLogger.CRITICAL)

Create molecule

In [None]:
ala = Chem.MolFromSmiles( 'CC(C(=O)O)N' )
ala

In [None]:
Chem.MolToMolBlock(ala)

In [None]:
Chem.MolToSmiles(ala)

In [None]:
Chem.MolToInchi(ala)

3D Coordinates

In [None]:
alaH = Chem.AddHs(ala)
rdDistGeom.EmbedMolecule(alaH)
alaH

In [None]:
ff = AllChem.UFFGetMoleculeForceField(alaH)
print( "Before=", ff.CalcEnergy() )
AllChem.UFFOptimizeMolecule(alaH)
print( "After=", ff.CalcEnergy() )
alaH

Descriptors

In [None]:
from rdkit.Chem import Descriptors

In [None]:
Descriptors.MolWt(alaH), Descriptors.MolLogP(alaH), Descriptors.RingCount(alaH), Descriptors.TPSA(alaH)

Database

In [None]:
from google.colab import auth
auth.authenticate_user()

from google.colab import drive
drive.mount('./MyDrive')

In [None]:
#db_egfr = Chem.SDMolSupplier( '../data/egfr/ChEMBL27_EGFR_aminopyridine.sdf' )
db_egfr = Chem.SDMolSupplier( './MyDrive/My Drive/Colab Notebooks/data/egfr/ChEMBL27_EGFR_aminopyridine.sdf' )
mols_egfr = [ x for x in db_egfr if x is not None ]
print( '%d-molecules read...' % len(mols_egfr) )
Draw.MolsToGridImage( mols_egfr[0:10], molsPerRow=4, subImgSize=(300,300) )

Substructure search

In [None]:
smiles = 'COc1cc(ccc1Nc1ncc(Cl)c(Oc2ccccc2)n1)N1CCNCC1'
query = Chem.MolFromSmiles( smiles )
AllChem.Compute2DCoords(query)
query

In [None]:
result = [ x for x in db_egfr if x.HasSubstructMatch(query) ]
print( "Found = ", len( result ), " from ", len( db_egfr ) )
for m in result:
    AllChem.GenerateDepictionMatching2DStructure( m, query )

Draw.MolsToGridImage( result[:20], molsPerRow=4, subImgSize=(300,300), highlightAtomLists=[ m.GetSubstructMatch(query) for m in result ]  )

Fingerprint similarity search

In [None]:
from rdkit.Chem.Fingerprints import FingerprintMols

fps = [ FingerprintMols.FingerprintMol(m) for m in mols_egfr ]
fp_ref = fps[1]
fvals = []
for i in range(len(fps)):
    fval = DataStructs.FingerprintSimilarity( fp_ref, fps[i] )
    fvals.append( fval )
print( np.array_str( np.array(fvals), precision=2 ) )

MCS

In [None]:
from rdkit.Chem import rdFMCS

mols_sub = mols_egfr[:10]
mcsf = rdFMCS.FindMCS( mols_sub )
mcs = Chem.MolFromSmarts( mcsf.smartsString )
mcs

In [None]:
Draw.MolsToGridImage( mols_sub, molsPerRow=5, subImgSize=(200,200), highlightAtomLists=[ m.GetSubstructMatch(mcs) for m in mols_sub ] )

In [None]:
from rdkit.Chem import rdRGroupDecomposition as rdRGD

mcsf = rdFMCS.FindMCS( mols_egfr )
mcs = Chem.MolFromSmarts( mcsf.smartsString )

matched,unmatched = rdRGD.RGroupDecompose([mcs],mols_egfr[:50],asSmiles=True)
print( len(matched), len(unmatched) )
print( matched[:3] )
Chem.MolFromSmiles( matched[0]['Core'])

Fingerprint

In [None]:
m = Chem.MolFromSmiles( Chem.MolToSmiles(mcs) )
bi = {}
fp = AllChem.GetMorganFingerprintAsBitVect(m, radius=2, bitInfo=bi)
print( bi )
Draw.DrawMorganBit(m, 1819, bi)

Fingerprint

In [None]:
from rdkit.Chem.Fingerprints import FingerprintMols

ala = Chem.MolFromSmiles( 'CC(C(=O)O)N' )
bi = {}
fp = AllChem.GetMorganFingerprintAsBitVect( ala, 2, nBits=1024, bitInfo=bi )
#fp_arr = np.zeros( (1, ) )
#DataStructs.ConvertToNumpyArray( fp, fp_arr )
#np.nonzero( fp_arr )
#list( fp.GetOnBits() )

mfps = [ ( ala, x, bi ) for x in fp.GetOnBits() ]
Draw.DrawMorganBits( mfps, molsPerRow=4, subImgSize=(150,150), legends=[ str(x) for x in fp.GetOnBits() ] )

In [None]:
cys = Chem.MolFromSmiles('SCC(C(=O)O)N')
bi2 = {}
fp2 = AllChem.GetMorganFingerprintAsBitVect( cys, 2, nBits=1024, bitInfo=bi2 )
mfps2 = [ ( cys, x, bi2 ) for x in fp2.GetOnBits() ]
Draw.DrawMorganBits( mfps2, molsPerRow=4, subImgSize=(150,150), legends=[ str(x) for x in fp2.GetOnBits() ] )

In [None]:
common = set( fp.GetOnBits() ) & set( fp2.GetOnBits() )
combined = set( fp.GetOnBits() ) | set( fp2.GetOnBits() )
print( len(common), ' : ', common )
print( len(combined), ' : ', combined )
print( len(common) / len(combined) )
print( DataStructs.TanimotoSimilarity( fp, fp2 ) )

Fingerprint similarity map

In [None]:
from rdkit.Chem.Draw import SimilarityMaps

ref = mols_sub[0]
mol = mols_sub[1]
fig, maxweight = SimilarityMaps.GetSimilarityMapForFingerprint( ref, mol, SimilarityMaps.GetMorganFingerprint )

Draw.MolsToGridImage( [ref, mol], molsPerRow=3, subImgSize=(300,300) )

In [None]:
import numpy as np

fps = [ FingerprintMols.FingerprintMol(m) for m in mols_egfr ]
fp_ref = fps[1]

val = []
for i in range(len(fps)):
    val.append( DataStructs.FingerprintSimilarity( fp_ref, fps[i] ) )
print( np.array_str( np.array(val), precision=2 ) )

In [None]:
from rdkit.Chem import rdMolDescriptors

m = mols_sub[0]
AllChem.ComputeGasteigerCharges(m)
charges = [ m.GetAtomWithIdx(i).GetDoubleProp('_GasteigerCharge') for i in range(m.GetNumAtoms()) ]
logp = rdMolDescriptors._CalcCrippenContribs(m)
print( "Charges = ", np.array_str( np.array(charges), precision=2 ) )
print( "(LogP, MR) = ", np.array_str( np.array(logp), precision=2 ) )

_ = SimilarityMaps.GetSimilarityMapFromWeights(m, charges, colorMap='jet', contourLines=10)
_ = SimilarityMaps.GetSimilarityMapFromWeights(mol,[x for x,y in logp], colorMap='jet', contourLines=10)

Reactions

In [None]:
rxn = AllChem.ReactionFromSmarts('[C:1](=[O:2])-[OD1].[N!H0:3]>>[C:1](=[O:2])[N:3]')
rxn

In [None]:
rx1 = Chem.MolFromSmiles('C(COC(=O)O)C(=O)O')
rx2 = Chem.MolFromSmiles('NC')
ps = rxn.RunReactants((rx1,rx2) )
print( len(ps) )
Draw.MolsToGridImage( [ rx1, rx2, ps[0][0], ps[1][0] ], molsPerRow=2, subImgSize=(300,300) )

RECAP

In [None]:
from rdkit.Chem import Recap
m = Chem.MolFromSmiles('c1ccccc1OCCOC(=O)CC')
hierarch = Recap.RecapDecompose(m)
print( hierarch.children.keys() )
hierarch.mol

In [None]:
!nvidia-smi
