In [17]:
%load_ext autoreload
%autoreload 2

import rdkit
rdkit.Chem.Draw.IPythonConsole.ipython_maxProperties = -1

import dgym as dg

# load all data
print('load data')
path = '../../dgym-data'

deck = dg.MoleculeCollection.from_sdf(
    f'{path}/DSi-Poised_Library_annotated.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
load data


In [26]:
%load_ext autoreload
%autoreload 2

import rdkit
rdkit.Chem.Draw.IPythonConsole.ipython_maxProperties = -1

import dgym as dg

# load all data
print('load data')
path = '../../dgym-data'

deck = dg.MoleculeCollection.from_sdf(
    f'{path}/Mpro_combined_aminopyridine.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
load data


In [27]:
from meeko import MoleculePreparation
from meeko import PDBQTWriterLegacy

def get_pdbqt(mol):

    # add hydrogens (without regard to pH)
    protonated_mol = rdkit.Chem.AddHs(mol)

    # generate 3D coordinates for the ligand. 
    rdkit.Chem.AllChem.EmbedMolecule(protonated_mol)

    # intialize preparation
    preparator = MoleculePreparation(
        # keep_chorded_rings=True,
        # keep_equivalent_rings=True,
        rigid_macrocycles=True
    )
    mol_setups = preparator.prepare(protonated_mol)
    
    for setup in mol_setups:
        pdbqt_string, is_ok, error_msg = PDBQTWriterLegacy.write_string(setup)
        if is_ok:
            return pdbqt_string

Create ligand PDBQTs.

In [28]:
import os
from tqdm.notebook import tqdm

out_dir = '../../dgym-data/out/ligands_temp/'

paths = []
for mol in tqdm(deck[:]):
    path = os.path.join(out_dir, f'{mol.name}.pdbqt')
    paths.append(path)
    pdbqt = get_pdbqt(mol.mol)
    
    with open(path, 'w') as file:
        file.write(pdbqt)

ligands_txt = ' '.join(paths)
path = os.path.join(out_dir, 'ligands.txt')
with open(path, 'w') as file:
    file.write(ligands_txt)

  0%|          | 0/158 [00:00<?, ?it/s]

Create ligand txt summary file.

In [29]:
# import subprocess
# import tempfile

# with tempfile.TemporaryDirectory() as tempdir:
#     print(f"Temporary directory created at {tempdir}")

In [282]:
%%time
import subprocess

command = 'unidock --receptor ../../dgym-data/Mpro_prepped.pdbqt --ligand_index ../../dgym-data/out/ligands_temp/ligands.txt --center_x 9.812 --center_y -0.257 --center_z 20.8485 --size_x 14.328 --size_y 8.85 --size_z 12.539 --dir ../../dgym-data/out/docking_results --exhaustiveness 128 --max_step 20 --num_modes 9 --scoring vinardo --refine_step 3 --seed 5'
resp = subprocess.run(
    command,
    shell=True, 
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE, 
    encoding='utf-8'
)

CPU times: user 8.15 ms, sys: 157 µs, total: 8.31 ms
Wall time: 15.8 s


In [285]:
import numpy as np

len(np.unique(deck.smiles))

145

In [193]:
import numpy as np

def boltzmann_sum(energies, temperature=298.15):
    kT = 0.0019872041 * temperature  # Boltzmann constant in kcal/(mol·K) multiplied by temperature in K
    energies = np.array(energies)  # Energies should be in kcal/mol

    # Use logsumexp for numerical stability
    return -kT * np.log(np.sum(np.exp(-energies / kT)))

In [275]:
import pandas as pd
import re
import glob
from itertools import islice
from operator import itemgetter

affinities = []
paths = glob.glob('../../dgym-data/out/docking_results/*.pdbqt')

for idx, path in enumerate(paths):
    with open(path, 'r') as file:

        # extract SMILES from the file
        smiles_str = list(islice(file, 7))[-1]
        smiles = smiles_str.split(' ')[-1].split('\n')[0]
        file.seek(0)

        # extract affinities
        affinity_strs = [line for line in file
                         if line.startswith('REMARK VINA RESULT')]
        process_affinity = lambda s: re.search(r'-?\d+\.\d+', s).group()
        affinity = [float(process_affinity(a)) for a in affinity_strs]

        # compute boltzmann sum
        affinity = boltzmann_sum(affinity)

        # append to affinities
        affinities.append(
            {'smiles': smiles, 'aff_pred': affinity}
        )

affinities = pd.DataFrame(affinities)

In [20]:
from dgym.envs.oracle import DockingOracle

config = {
    'center_x': 9.812,
    'center_y': -0.257,
    'center_z': 20.8485,
    'size_x': 14.328,
    'size_y': 8.85,
    'size_z': 12.539,
    'exhaustiveness': 128,
    'max_step': 20,
    'num_modes': 9,
    'scoring': 'vinardo',
    'refine_step': 3,
    'seed': 5
}

docking_oracle = DockingOracle(
    'Mpro',
    receptor_path='../../dgym-data/Mpro_prepped.pdbqt',
    config=config
)

In [21]:
%%time
res = docking_oracle(deck[:])

CPU times: user 10.8 s, sys: 101 ms, total: 10.9 s
Wall time: 1min 11s


In [13]:
res

[-5.233395222354202,
 -4.493057555970839,
 -4.666335639692606,
 -5.947392078560946,
 -5.206392108091621,
 -5.612342649629879,
 -4.376370560938596,
 -4.303687445626837,
 -6.2169012296008725,
 -5.125914560236025,
 -4.392722905987674,
 -5.029570762879757,
 -5.617342210215726,
 -4.399588706631558,
 -5.559580098303922,
 -6.282028622303432,
 -5.632014945141764,
 -4.81391194890751,
 -4.848871244081563,
 -5.241299695637286,
 -4.870329004373337,
 -4.5221215549321405,
 -5.474712406477507,
 -5.245883278184889,
 -4.556111803948746,
 -5.597310931326232,
 -6.2306650964506645,
 -5.850630987807328,
 -4.441652475928307,
 -6.010661022186258,
 -5.112476868541908,
 -6.018535095717932,
 -5.308715705436513,
 -4.96617098102524,
 -5.299194989201773,
 -4.903365084429658,
 -5.465257062708307,
 -5.399444305772434,
 -4.487345830149069,
 -4.774931947386903,
 -4.766652483520855,
 -4.092627554160678,
 -5.020000925342745,
 -5.659292629521595,
 -4.802896181110186,
 -5.209578996220187,
 -5.1132216503348165,
 -4.9600876