### Author - Ajaya Kumar Sahoo

#### Compute chemical similarity using ECFP4 and MACCS fingerprint

In [1]:
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit import DataStructs
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import MACCSkeys

In [4]:
# reading molecules from sdf file
# replace input.sdf with the input file in your machine

fin = 'input.sdf' # input file

edcs = Chem.SDMolSupplier(fin)

print ('Number of molecules {}'.format(len(edcs)))


In [5]:
fout = open(fin.replace('.sdf','_tani_out.tsv'),'w') # output file
fout.write('id1'+'\t'+'id2''\t'+'ECFP4'+'\t'+'MACCS'+'\n')



In [6]:
#calculation of fingerprints

figprnts_ecfp4={}
figprnts_maccs={}
molnames=[]

for mol in edcs:
    t1=mol.GetProp('_Name')
    molnames.append(t1)
    if mol is None:
        print('Error: {}'.format(t1))
    else:
        figprnts_ecfp4[t1]=AllChem.GetMorganFingerprint(mol,2) #ECFP4
        figprnts_maccs[t1]=MACCSkeys.GenMACCSKeys(mol) #MACCS



In [7]:
#calculation of tanimoto coefficient

temp=sorted(molnames)[:]
for i in sorted(molnames):
    temp.remove(i)
    for j in temp:
        tani_ecfp4 = DataStructs.TanimotoSimilarity(figprnts_ecfp4[i],figprnts_ecfp4[j])
        tani_maccs = DataStructs.TanimotoSimilarity(figprnts_maccs[i],figprnts_maccs[j])

        tani = list(map(str,[i,j,tani_ecfp4,tani_maccs]))

        fout.write('\t'.join(tani) + '\n') # saving the output file

fout.close()