In [2]:
from rdkit import Chem
with open("sampled_SMILES_preped.smi") as f:
    smis_preped = [l.rstrip() for l in f]

In [3]:
len(smis_preped)

54132

In [4]:
#filter1: retain compounds obeying Ro5 and remove PAINS
from adme_pred import ADME
smis_filtered_1 = []
for smi in smis_preped:
    mol = ADME(smi)
    if mol.druglikeness_lipinski() and not mol.pains():
        smis_filtered_1.append(smi)

In [5]:
len(smis_filtered_1) 

38860

In [6]:
#filter2:QED>0.3, SAscore<5
from rdkit.Chem.QED import qed
from tqdm import tqdm
import sascorer
mols_filtered_2 = []
for smi in tqdm(smis_filtered_1):
    mol = Chem.MolFromSmiles(smi)
    qed_mol = qed(mol)
    sascore_mol = sascorer.calculateScore(mol)
    if qed_mol>0.3 and sascore_mol<5:
        mols_filtered_2.append(mol)

100%|██████████| 38860/38860 [03:25<00:00, 189.20it/s]


In [7]:
len(mols_filtered_2)

37732

In [20]:
writer = Chem.SDWriter("sampled_fordock.sdf")
for i, mol in enumerate(tqdm(mols_filtered_2)):
    mol.SetProp("_Name", "molecule" + str(i+1))
    writer.write(mol)
writer.close()

100%|██████████| 21465/21465 [00:03<00:00, 6629.59it/s]


In [8]:
import os
for i, mol in enumerate(tqdm(mols_filtered_2)):
    m_name = "molecule" + str(i+1)
    mol.SetProp("_Name", m_name)
    with Chem.SDWriter(os.path.join("vl-202312", m_name+".sdf")) as w:
        w.write(mol)

100%|██████████| 37732/37732 [01:59<00:00, 315.58it/s]


In [19]:
writer = Chem.SDWriter("f5.sdf")
mol = Chem.MolFromSmiles("C(=O)(N1CCC(C(=O)N2CC=C(c3ccccc3)CC2)CC1)c1n[nH]c(-c2ncccc2)c1")
mol = Chem.AddHs(mol)
writer.write(mol)
writer.close()

In [16]:
#filter3:compute ECFP4 similarity between f5 and molecules
from rdkit import DataStructs
from rdkit.Chem import AllChem

writer = Chem.SDWriter("rock2_f3_fcfp6.sdf")
f5_mol = Chem.MolFromSmiles("C(=O)(N1CCC(C(=O)N2CC=C(c3ccccc3)CC2)CC1)c1n[nH]c(-c2ncccc2)c1")
f5_fp = AllChem.GetMorganFingerprintAsBitVect(f5_mol, 3, nBits=1024, useFeatures=True)
for i, mol in enumerate(tqdm(mols_filtered_2)):
    mol_fp = AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=1024, useFeatures=True)
    sims = DataStructs.FingerprintSimilarity(f5_fp, mol_fp)
    mol.SetProp("_Name", "molecule" + str(i+1))
    mol.SetDoubleProp("sims", sims)
    writer.write(mol)
writer.close()


100%|██████████| 21465/21465 [00:06<00:00, 3238.79it/s]
