In [40]:
import pandas as pd
import seaborn as sns
from rdkit import rdBase, Chem
from rdkit.Chem import AllChem, Descriptors
import py3Dmol

In [3]:
# read molecule
suppl = Chem.SDMolSupplier('./sdf/platinum_dataset_2017_01.sdf')
mols = [x for x in suppl if x is not None]
len(mols)

4548

In [6]:
desc_list = Descriptors.descList
len(desc_list)

200

In [8]:
desc_list[:3]

[('MaxEStateIndex',
  <function rdkit.Chem.EState.EState.MaxEStateIndex(mol, force=1)>),
 ('MinEStateIndex',
  <function rdkit.Chem.EState.EState.MinEStateIndex(mol, force=1)>),
 ('MaxAbsEStateIndex',
  <function rdkit.Chem.EState.EState.MaxAbsEStateIndex(mol, force=1)>)]

In [20]:
xxx = 2
print(Chem.rdMolDescriptors.CalcNumLipinskiHBA(mols[xxx]))
print(Chem.rdMolDescriptors.CalcNumLipinskiHBD(mols[xxx]))
print(Chem.rdMolDescriptors.CalcNumHBA(mols[xxx]))
print(Chem.rdMolDescriptors.CalcNumHBD(mols[xxx]))

8
2
6
2


In [28]:
# calculate object for descriptor
calc = {}
for i,j in desc_list:
    calc[i] = j

lipinski_list = ['NumHDonors', 'NumHAcceptors', 'MolWt', 'MolLogP']

# function for judging Rule Of Five
def calc_lipinski(mol):
    lipinski = {}
    for desc in lipinski_list:
        lipinski[desc] = calc[desc](mol)
    return lipinski

def check_lipinski(dic):
    if dic['MolWt'] <= 500 and dic['MolLogP'] <= 5 and dic['NumHDonors'] <= 5 and dic['NumHAcceptors'] <= 10:
        return True
    else:
        return False


def rule_of_five(mol):
    prop = calc_lipinski(mol)
    if check_lipinski(prop):
        return mol



In [29]:
# calculate specifically
lipinski_mols = []
bad_mols = []
for m in mols:
    if rule_of_five(m):
        lipinski_mols.append(m)
    else:
        bad_mols.append(m)

len(lipinski_mols), len(bad_mols)

(3574, 974)

In [38]:
xxx = 1
print(calc_lipinski(mols[xxx]))
print(check_lipinski(calc_lipinski(mols[xxx])))

{'NumHDonors': 0, 'NumHAcceptors': 7, 'MolWt': 439.5190000000002, 'MolLogP': 3.816200000000002}
True


In [45]:
# visualize by py3Dmol
v = py3Dmol.view(width=800, height=600, linked=False, viewergrid=(3,4))
grid = [(i, j) for i in range(3) for j in range(4)]
for m, g in zip(bad_mols[:12], grid):
    mb = Chem.MolToMolBlock(m)
    v.addModel(mb, 'sdf', viewer=g)
v.setStyle({'stick': {}})
v.setBackgroundColor('#EEEEEE')
v.zoomTo()
v.show()