#Substructure matching based on Murcko Scaffold

#Importing the libraries

In [None]:
from rdkit import Chem
from rdkit.Chem import Scaffolds
from rdkit.Chem import rdMolHash
from rdkit.Chem import Draw

#List of molecules as SMILES

In [None]:
mList = ['CCCC1=NN(C2=C1N=C(NC2=O)C3=C(C=CC(=C3)S(=O)(=O)N4CCN(CC4)C)OCC)C',\
         'CCCC1=NC(=C2N1N=C(NC2=O)C3=C(C=CC(=C3)S(=O)(=O)N4CCN(CC4)CC)OCC)C']

#Converting SMILES to mols

In [None]:
mMols = [Chem.MolFromSmiles(m) for m in mList]

#Identify Mol Hash from the given list of molecules using Murcko Scaffold

In [None]:
murckoHashList = [rdMolHash.MolHash(mMol, Chem.rdMolHash.HashFunction.MurckoScaffold) for mMol in mMols]

#Identify the most frequent Murcko hash (MolHash) out of the list of Murcko hashes identified

In [None]:
def mostFreq(list):
    return max(list, key = list.count)

In [None]:
mostFreq_murckoHash = mostFreq(murckoHashList)

In [None]:
mostFreq_murckoHash = "c1cc([SH4]N2CCNCC2)cc(-c2nn3cncc3c[nH]2)c1"

#Converting the most frequent Murcko hash to the mol format

In [None]:
mostFreq_murckoHash_mol = Chem.MolFromSmiles(mostFreq_murckoHash)

In [None]:
Chem.SanitizeMol(Chem.MolFromSmiles(mostFreq_murckoHash))

In [None]:
murckoHashList

In [None]:
mostFreq_murckoHash_mol

#Getting the substructure match for the Murcko hash based mol for each of the molecule

In [None]:
highlight_mostFreq_murckoHash = [mMol.GetSubstructMatch(mostFreq_murckoHash_mol) for mMol in mMols]

#Generating a grid image to view the mols

In [None]:
Draw.MolsToGridImage(mMols, legends = [murckoHash for murckoHash in murckoHashList],
                    highlightAtomLists = highlight_mostFreq_murckoHash, subImgSize = (250,250), useSVG = True)

#Substructure matching based on maximum common substructure (MCS)

In [None]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import rdFMCS

#List of molecules as SMILES

In [None]:
mList = ['CCCC1=NN(C2=C1N=C(NC2=O)C3=C(C=CC(=C3)S(=O)(=O)N4CCN(CC4)C)OCC)C',\
         'CCCC1=NC(=C2N1N=C(NC2=O)C3=C(C=CC(=C3)S(=O)(=O)N4CCN(CC4)CC)OCC)C']

#Converting SMILES to mols

In [None]:
mMols = [Chem.MolFromSmiles(m) for m in mList]

#Finding the maximum common substructure among the list of molecules

In [None]:
commonMCS = rdFMCS.FindMCS(mMols,maximizeBonds=True,threshold=1,
                           matchValences=False,ringMatchesRingOnly=False,
                          completeRingsOnly=False,matchChiralTag=False)

#Convert the maximum common substructure to mol format

In [None]:
commonMCS_Mol = Chem.MolFromSmarts(commonMCS.smartsString)

#Getting the substructure match for the MCS based mol for each of the molecule

In [None]:
highlight_MCS = [mMol.GetSubstructMatch(commonMCS_Mol) for mMol in mMols]

#Generating a grid image to view the mols

In [None]:
Draw.MolsToGridImage(mMols,
                highlightAtomLists = highlight_MCS, subImgSize = (500,500), useSVG = True)