# Plot the runtimes of DrugGym

In [1]:
%reload_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore') 

import rdkit
import dgym as dg

# load all data
path = '../../dgym-data'

deck = dg.MoleculeCollection.load(
    f'{path}/DSi-Poised_Library_annotated.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

reactions = dg.ReactionCollection.from_json(
    path = f'{path}/All_Rxns_rxn_library_sorted.json',
    smarts_col = 'reaction_string',
    classes_col = 'functional_groups'
)

building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table('../../dgym-data/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

In [2]:
import os

def get_tcp_objectives():

    dockstring_dir = f'{path}/dockstring_targets/'
    files = os.listdir(dockstring_dir)
    configs = sorted([f for f in files if 'conf' in f])
    targets = sorted([f for f in files if 'target' in f])

    idx = 0
    with open(dockstring_dir + configs[idx], 'r') as f:
        config_ = f.readlines()
        config_ = [c.replace('\n', '') for c in config_]
        config_ = [c.split(' = ') for c in config_ if c]
        config_ = {c[0]: float(c[1]) for c in config_}

    target = targets[idx]
    name = target.split('_')[0]

    config_.update({
        'size_x': 22.5,
        'size_y': 22.5,
        'size_z': 22.5,
    })

    from dgym.envs.oracle import \
        ConstantOracle, RandomOracle, DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle
    from dgym.envs.utility import ClassicUtilityFunction

    config = {
        'search_mode': 'detailed',
        'scoring': 'vina',
        'seed': 5,
        **config_
    }

    pIC50_oracle = DockingOracle(
        f'{name} pIC50',
        receptor_path=f'{path}/dockstring_targets/{name}_target.pdbqt',
        config=config
    )
    constant_oracle = ConstantOracle('Constant', constant=1)
    random_oracle = RandomOracle('Random')
    log_P_oracle = RDKitOracle('Log P', descriptor='MolLogP')
    log_S_oracle = CatBoostOracle(
        'Log S', path='../dgym/envs/models/aqsolcb.model')
    
    return pIC50_oracle, log_P_oracle, log_S_oracle, constant_oracle, random_oracle

In [5]:
from dgym.envs.utility import ClassicUtilityFunction, Policy

# Get objectives
pIC50_oracle, log_P_oracle, log_S_oracle, constant_oracle, random_oracle = get_tcp_objectives()

# Define utility functions
pIC50_utility = ClassicUtilityFunction(
    pIC50_oracle, ideal=(9.5, 13), acceptable=(8, 13))
log_P_utility = ClassicUtilityFunction(
    log_P_oracle, ideal=(0.5, 1.85), acceptable=(-0.5, 3.5))
log_S_utility = ClassicUtilityFunction(
    log_S_oracle, ideal=(-3, 1), acceptable=(-4, 1))
constant_utility = ClassicUtilityFunction(
    constant_oracle, ideal=(3, 4), acceptable=(2, 5))
random_utility = ClassicUtilityFunction(
    random_oracle, ideal=(3, 4), acceptable=(2, 5))

# Assemble assays and surrogate models
assays = [
    pIC50_oracle,
    log_P_oracle,
    log_S_oracle,
    pIC50_oracle.surrogate(sigma=1.0),
    log_P_oracle.surrogate(sigma=1.0),
    log_S_oracle.surrogate(sigma=1.0),
]

# Environment tolerates acceptable ADMET
from copy import deepcopy
utility_agent = Policy(
    utility_functions = [pIC50_utility, log_P_utility, log_S_utility],
    weights = [0.8, 0.1, 0.1]
)
utility_env = deepcopy(utility_agent)
utility_env.utility_functions[1].ideal = utility_env.utility_functions[1].acceptable
utility_env.utility_functions[2].ideal = utility_env.utility_functions[2].acceptable

In [6]:
from dgym.envs.designer import Designer, Generator

designer = Designer(
    Generator(building_blocks, fingerprints, sizes),
    reactions,
    cache = True
)

In [None]:
import time
from tqdm.auto import tqdm
import io
from contextlib import redirect_stdout
f = io.StringIO()

with redirect_stdout(f):
    
    # design_runtimes = []
    # score_runtimes = []
    
    sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
    for trial in range(1, 25):
        for size in tqdm(sizes):
            
            # Ideate molecules
            t = time.time()
            molecules = designer.design(size)
            time_elapsed = t - time.time()
            design_runtimes.append({
                'size': size,
                'time': time_elapsed,
                'trial': trial
            })

            # Score molecules
            t = time.time()
            utility_agent(molecules)
            time_elapsed = t - time.time()
            score_runtimes.append({
                'size': size,
                'time': time_elapsed,
                'trial': trial
            })

  0%|          | 0/10 [00:00<?, ?it/s]

[15:09:42] Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 16
[15:09:45] Can't kekulize mol.  Unkekulized atoms: 10 12 13
[15:09:45] Can't kekulize mol.  Unkekulized atoms: 12 14 18
[15:09:51] Can't kekulize mol.  Unkekulized atoms: 15 16 18
[15:09:51] Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 19
[15:10:00] Can't kekulize mol.  Unkekulized atoms: 16 19 20 23 24 25 26
[15:10:12] Can't kekulize mol.  Unkekulized atoms: 11 13 15
[15:10:12] Can't kekulize mol.  Unkekulized atoms: 9 11 13
[15:10:12] Can't kekulize mol.  Unkekulized atoms: 20 22 24
[15:10:12] Can't kekulize mol.  Unkekulized atoms: 13 15 18
[15:10:12] Can't kekulize mol.  Unkekulized atoms: 10 12 13
[15:10:12] Can't kekulize mol.  Unkekulized atoms: 13 14 15 16 17
[15:10:12] Can't kekulize mol.  Unkekulized atoms: 11 13 15
[15:10:12] Can't kekulize mol.  Unkekulized atoms: 11 13 15
[15:10:36] Can't kekulize mol.  Unkekulized atoms: 19 20 21
[15:10:36] Can't kekulize mol.  Unkekulized atoms: 9 11 12
[15:10:36] C

  0%|          | 0/10 [00:00<?, ?it/s]

[15:11:10] Can't kekulize mol.  Unkekulized atoms: 16 18 21
[15:11:12] Can't kekulize mol.  Unkekulized atoms: 13 14 15 16 17
[15:11:16] Can't kekulize mol.  Unkekulized atoms: 14 15 17
[15:11:16] Can't kekulize mol.  Unkekulized atoms: 14 16 17
[15:11:22] Can't kekulize mol.  Unkekulized atoms: 11 13 15
[15:11:32] Can't kekulize mol.  Unkekulized atoms: 14 16 18
[15:11:32] Can't kekulize mol.  Unkekulized atoms: 12 14 15
[15:11:32] Can't kekulize mol.  Unkekulized atoms: 13 15 17
[15:11:32] Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 20
[15:11:46] Can't kekulize mol.  Unkekulized atoms: 17 19 20
[15:11:46] Can't kekulize mol.  Unkekulized atoms: 13 14 15 16 18
[15:11:46] Can't kekulize mol.  Unkekulized atoms: 18 21 23 24 27 28 32
[15:11:46] Can't kekulize mol.  Unkekulized atoms: 9 11 14
[15:11:46] Can't kekulize mol.  Unkekulized atoms: 17 18 19
[15:11:46] Can't kekulize mol.  Unkekulized atoms: 13 15 17
[15:11:46] Can't kekulize mol.  Unkekulized atoms: 13 15 17
[15:11:46] 

In [None]:
import seaborn as sns

sns.lineplot(size_runtimes, x=')