In [5]:
from rdkit import Chem
from rdkit.Chem import Descriptors, Lipinski
import numpy as np
import pandas as pd

In [6]:
def get_moldata(smiles):
    '''get mol data from the input data smiles'''
    moldata = []
    for i in smiles:
        mol = Chem.MolFromSmiles(i)
        moldata.append(mol)

    return moldata

In [7]:
def get_lip(mols):
    '''get the lipinski descriptors MW, LogP, Number of HBD, and Number of HBA from mol data'''
    data =[]
    for mol in mols:
        mw = Descriptors.MolWt(mol)
        data.append(mw)
        logp = Descriptors.MolLogP(mol)
        data.append(logp)
        numHBA = Lipinski.NumHAcceptors(mol)
        data.append(numHBA)
        numHBD = Lipinski.NumHDonors(mol)
        data.append(numHBD)

    data = np.array(data)
    data1 = data.reshape((int(len(data)/4)),4)
    columnNames = ['MW', 'LogP', 'NumHAcceptors', 'NumHDonors']
    descriptors = pd.DataFrame(data = data1, columns = columnNames)
    return descriptors

In [31]:
def convert_to_pIC50(std_val):
    '''get the pIC50 from IC50'''
    pIC50 = []
    for i in std_val:
        pIC50val = -np.log10(0.000000001*i)
        pIC50.append(pIC50val)
    df3 = pd.DataFrame({'pIC50': pIC50})
    return df3

In [19]:
smiles = ['C12=NC=NC=C1C=CC=C2', 'C1(C=CC=C2)=C2C=NN1CC3=NC=CS3', 'C[C@H](C(N(C)C)=O)[O]']

In [20]:
mols = get_moldata(smiles)

In [21]:
get_lip(mols)

Unnamed: 0,MW,LogP,NumHAcceptors,NumHDonors
0,130.15,1.6298,2.0,0.0
1,215.281,2.5411,4.0,0.0
2,116.14,-0.1064,1.0,0.0
