In [1]:
import os
import rdkit.Chem as Chem
import rdkit.Chem.AllChem as AllChem
import re
import pandas as pd

In [2]:
def get_docking_score(mol: Chem.Mol, output_name: str = 'molecule_docked'):
    Chem.MolToMolFile(mol, f'molecule.mol')
    !obabel -imol molecule.mol -omol2 -O molecule.mol2
    os.remove(f'molecule.mol')
    
    !smina -r 6luq_preprocessed.pdb -l molecule.mol2 --autobox_ligand d2_ligand.pdb --autobox_add 8 --exhaustiveness 16 --out outputs/{output_name}.mol2
    
    output = !smina -r 6luq_preprocessed.pdb -l molecule_docked.mol2 --score_only
    score = float(re.findall(r'Affinity:\s*(\-?[\d\.]+)', '\n'.join(output))[0])
    return score

def optimize_conformation(mol):
    mol = Chem.AddHs(mol)  # Adds hydrogens to make optimization more accurate
    AllChem.EmbedMolecule(mol)  # Adds 3D positions
    AllChem.MMFFOptimizeMolecule(mol)  # Improves the 3D positions using a force-field method
    return mol

In [3]:
df = pd.read_csv('../results/SVC_sonic_3-29-20/preds_20230927-113519/predictions.csv').sample(10)
df['mol'] = df['smiles'].apply(Chem.MolFromSmiles)
df['mol'] = df['mol'].apply(optimize_conformation)
for i, row in df.iterrows():
    df.loc['score'] = get_docking_score(row['mol'], f'molecule_docked_{i}')

1 molecule converted
   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div

Using random seed: 1069026917

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rm

Unnamed: 0,idx,smiles,largest_ring,num_rings,qed,no_unwanted_frags,mol_wt,num_HBA,num_HBD,logP,num_rotatable_bonds,tpsa,bridgehead_atoms,spiro_atoms,mol,docking_score
812,4817,CCC=CNCCC1=C[NH1]C2=CC=CC=C12,6,2,0.734765,True,214.146999,1,2,3.2237,5,27.82,0,0,<rdkit.Chem.rdchem.Mol object at 0x7f7d82def520>,-7.91974
959,5739,O=C(NCCCOC1=CC=CN=C1)C2=CC=CC=C2Br,6,2,0.825816,True,334.03169,3,1,3.043,6,51.22,0,0,<rdkit.Chem.rdchem.Mol object at 0x7f7d82def460>,-8.47309
1050,6297,C=C(NC1=CC=CC=C1)F,6,1,0.617431,True,137.064077,1,1,2.5392,2,12.03,0,0,<rdkit.Chem.rdchem.Mol object at 0x7f7d82def6a0>,-6.12358
156,929,OC1=CC2=CC=C(CNCC3CCC(C(C)=O)CC3)C=C2C=C1,6,3,0.87622,True,311.188529,3,2,4.0304,5,49.33,0,0,<rdkit.Chem.rdchem.Mol object at 0x7f7d82def760>,-10.0507
916,5387,ClC1=CC(Cl)=CC(CC2=NN3C=C(Cl)C=C(Cl)C3=N2)=C1,6,3,0.657837,True,344.939408,3,0,4.9337,2,30.19,0,0,<rdkit.Chem.rdchem.Mol object at 0x7f7d82def7c0>,-7.7225
