# Testing New Experiment API

Load data.

In [14]:
%reload_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore') 

import rdkit
import dgym as dg

# load all data
path = '../../dgym-data'

deck = dg.MoleculeCollection.load(
    f'{path}/DSi-Poised_Library_annotated.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

reactions = dg.ReactionCollection.from_json(
    path = f'{path}/All_Rxns_rxn_library_sorted.json',
    smarts_col = 'reaction_string',
    classes_col = 'functional_groups'
)

building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table('../../dgym-data/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

Load assays and utility functions.

In [15]:
import os

def get_tcp_objectives():

    dockstring_dir = f'{path}/dockstring_targets/'
    files = os.listdir(dockstring_dir)
    configs = sorted([f for f in files if 'conf' in f])
    targets = sorted([f for f in files if 'target' in f])

    idx = 0
    with open(dockstring_dir + configs[idx], 'r') as f:
        config_ = f.readlines()
        config_ = [c.replace('\n', '') for c in config_]
        config_ = [c.split(' = ') for c in config_ if c]
        config_ = {c[0]: float(c[1]) for c in config_}

    target = targets[idx]
    name = target.split('_')[0]

    config_.update({
        'size_x': 22.5,
        'size_y': 22.5,
        'size_z': 22.5,
    })

    from dgym.envs.oracle import \
        ConstantOracle, RandomOracle, DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle
    from dgym.envs.utility import ClassicUtilityFunction

    config = {
        'search_mode': 'detailed',
        'scoring': 'vina',
        'seed': 5,
        **config_
    }

    pIC50_oracle = DockingOracle(
        f'{name} pIC50',
        receptor_path=f'{path}/dockstring_targets/{name}_target.pdbqt',
        config=config
    )
    constant_oracle = ConstantOracle('Constant', constant=1)
    random_oracle = RandomOracle('Random')
    log_P_oracle = RDKitOracle('Log P', descriptor='MolLogP')
    log_S_oracle = CatBoostOracle(
        'Log S', path='../dgym/envs/models/aqsolcb.model')
    
    return pIC50_oracle, log_P_oracle, log_S_oracle, constant_oracle, random_oracle

In [16]:
from dgym.envs.utility import ClassicUtilityFunction, MultipleUtilityFunction

# Get objectives
pIC50_oracle, log_P_oracle, log_S_oracle, constant_oracle, random_oracle = get_tcp_objectives()

# Define utility functions
pIC50_utility = ClassicUtilityFunction(
    pIC50_oracle, ideal=(9.5, 13), acceptable=(8, 13))
log_P_utility = ClassicUtilityFunction(
    log_P_oracle, ideal=(0.5, 1.85), acceptable=(-0.5, 3.5))
log_S_utility = ClassicUtilityFunction(
    log_S_oracle, ideal=(-3, 1), acceptable=(-4, 1))
constant_utility = ClassicUtilityFunction(
    constant_oracle, ideal=(3, 4), acceptable=(2, 5))
random_utility = ClassicUtilityFunction(
    random_oracle, ideal=(3, 4), acceptable=(2, 5))

# Assemble assays and surrogate models
from dgym.envs.oracle import GaussianOracle
assays = [
    pIC50_oracle,
    log_P_oracle,
    log_S_oracle,
    GaussianOracle(f'Noisy {pIC50_oracle.name}', loc=7.460298232446733, scale=100),
    GaussianOracle(f'Noisy {log_P_oracle.name}', loc=2.223214738326521, scale=100),
    GaussianOracle(f'Noisy {log_S_oracle.name}', loc=-3.752548978069126, scale=100),
]

# Environment tolerates acceptable ADMET
from copy import deepcopy
utility_agent = MultipleUtilityFunction(
    utility_functions = [pIC50_utility, log_P_utility, log_S_utility],
    weights = [0.8, 0.1, 0.1]
)
utility_env = deepcopy(utility_agent)
utility_env.utility_functions[1].ideal = utility_env.utility_functions[1].acceptable
utility_env.utility_functions[2].ideal = utility_env.utility_functions[2].acceptable

Instantiate designer.

In [17]:
from dgym.envs.designer import Designer, Generator

designer = Designer(
    Generator(building_blocks, fingerprints, sizes),
    reactions,
    cache = True
)

Pick 5 random starting hits.

In [18]:
import warnings
warnings.filterwarnings("ignore")

# select first molecule
import random
def select_molecule(deck):
    initial_index = random.randint(0, len(deck) - 1)
    initial_molecule = deck[initial_index]
    if len(initial_molecule.reactants) == 2 \
        and designer.match_reactions(initial_molecule):
        return initial_molecule
    else:
        return select_molecule(deck)

initial_molecules = [select_molecule(deck) for _ in range(5)]
library = dg.MoleculeCollection(initial_molecules).update_annotations()

# Score molecules
for assay in assays:
    if 'Noisy' not in assay.name:
        results = assay(library)
        for molecule, result in zip(library, results):
            molecule.update_annotations({assay.name: result})

# Set status to tested
library.set_status('Tested', step=0)

[18:14:34] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 4 
[18:14:34] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 3 
[18:14:34] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 3 
[18:14:34] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 3 
[18:14:34] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 3 
[18:14:34] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 4 
[18:14:34] product atom-mapping number 15 not found in reactants.
[18:14:34] product atom-mapping number 14 not found in reactants.
[18:14:34] product atom-mapping number 13 not found in reactants.
[18:14:34] reactant 1 has no mapped atoms.
[18:14:34] product atom-mapping number 2 not found in reactants.
[18:14:34] product atom-mapping number 3 not found in reactants.
[18:14:34] product atom

[]
unidock --receptor ../../dgym-data/dockstring_targets/ABL1_target.pdbqt --ligand_index /tmp/tmpcqnzl863/ligands.txt --dir /tmp/tmpcqnzl863 --search_mode detailed --scoring vina --seed 5 --center_x 15.851 --center_y 14.647 --center_z 3.904 --size_x 22.5 --size_y 22.5 --size_z 22.5
CompletedProcess(args='unidock --receptor ../../dgym-data/dockstring_targets/ABL1_target.pdbqt --ligand_index /tmp/tmpcqnzl863/ligands.txt --dir /tmp/tmpcqnzl863 --search_mode detailed --scoring vina --seed 5 --center_x 15.851 --center_y 14.647 --center_z 3.904 --size_x 22.5 --size_y 22.5 --size_z 22.5', returncode=0, stdout='Uni-Dock v1.1.0\n\nIf you used Uni-Dock in your work, please cite:               \n \nYu, Y., Cai, C., Wang, J., Bo, Z., Zhu, Z., & Zheng, H. (2023). \nUni-Dock: GPU-Accelerated Docking Enables Ultralarge Virtual Screening. \nJournal of Chemical Theory and Computation.                    \nhttps://doi.org/10.1021/acs.jctc.2c01145                       \n\nTang, S., Chen, R., Lin, M., L

Instantiate Environment and Agent.

In [19]:
from dgym.envs import DrugEnv
from dgym.agents import SequentialDrugAgent
from dgym.agents.exploration import EpsilonGreedy

drug_env = DrugEnv(
    designer = designer,
    library = library,
    assays = assays,
    utility_function = utility_env
)

# Construct sequence
design_grow = {'name': 'design', 'batch_size': 8, 'parameters': {'strategy': 'grow', 'size': 5}}
design_replace = {'name': 'design', 'batch_size': 8, 'parameters': {'strategy': 'replace', 'size': 5, 'temperature': 0.2}}
score = {'name': ['Noisy ABL1 pIC50', 'Noisy Log S', 'Noisy Log P'], 'batch_size': 8 * 5, 'parameters': {'batch_size': 40}}
make = {'name': 'make', 'batch_size': 8}
test = {'name': ['ABL1 pIC50', 'Log S', 'Log P'], 'batch_size': 8}
design_and_score = [design_replace, score]

sequence = [design_replace, score, design_grow, score, make, test]

drug_agent = SequentialDrugAgent(
    sequence = sequence,
    exploration_strategy = EpsilonGreedy(epsilon=0.25),
    utility_function = utility_agent
)

In [20]:
def display_best(experiment):
    import numpy as np
    observations = experiment.drug_env.library
    utilities = experiment.drug_env.utility_function(
        observations, use_precomputed=True, method='average')
    for obs, utility in zip(observations, utilities):
        obs.update_annotations({'utility': utility})
    tested = observations.tested
    if tested:
        best_tested = (
            tested
            .annotations
            .sort_values('utility', ascending=False)
            [['ABL1 pIC50', 'Log S', 'Log P', 'utility']]
            .head()
        )
        display(best_tested)

## Test loading mechanism

In [21]:
import warnings
warnings.filterwarnings('ignore')

import os
import json
import zipfile
import pandas as pd
from tqdm.auto import tqdm
from io import TextIOWrapper
from collections import defaultdict

In [22]:
path = '../../dgym-data/analysis/noise/selection_max_noise_2024-05-19_11-56-02.zip'

with zipfile.ZipFile(path, 'r') as z:
    for filename in tqdm(z.namelist()[:1]):
        if filename.endswith('.json'):
            with z.open(filename) as file:
                with TextIOWrapper(file, encoding='utf-8') as text_file:
                    result = json.load(text_file)

  0%|          | 0/1 [00:00<?, ?it/s]

In [24]:
from dgym.experiment import Experiment

experiment = Experiment(
    drug_env=drug_env,
    drug_agent=drug_agent
)

exp_loaded = experiment.load(result)
exp_loaded.run()

  0%|          | 0/1 [00:00<?, ?it/s]

MoleculeCollection with 4871 Molecules


  0%|          | 0/100000 [00:00<?, ?it/s]

Created action
{'name': 'design', 'parameters': {'strategy': 'replace', 'size': 5, 'temperature': 0.2}, 'molecules': [2983, 831, 4798, 3942, 3868, 4335, 3846, 4298]}
['Tested', 'Tested', 'Tested', 'Tested', 'Tested', 'Tested', 'Tested', 'Scored']
0.7588616636136815
Created action
{'name': ['Noisy ABL1 pIC50', 'Noisy Log S', 'Noisy Log P'], 'parameters': {'batch_size': 40}, 'molecules': [4871, 4885, 4872, 4873, 4874, 4875, 4876, 4877, 4878, 4891, 4879, 4894, 4880, 4881, 4882, 4883, 4884, 4886, 4887, 4888, 4889, 4896, 4910, 4890, 4904, 4892, 4893, 4899, 4895, 4900, 4897, 4908, 4898, 4901, 4902, 4903, 4905, 4906, 4907, 4909]}
['Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designe

[18:15:33] Can't kekulize mol.  Unkekulized atoms: 13 16 17
[18:15:33] Can't kekulize mol.  Unkekulized atoms: 9 12 13
[18:15:33] Can't kekulize mol.  Unkekulized atoms: 9 12 13


[-176.42287038799287, 40.64303325660858, 59.40733351216957, 177.85456645179923, 68.76198215846317, -59.36637895101163, 155.55538816469223, -9.38538995026137, 77.33203510960702, -39.747542607811255, -113.81603470896033, 28.432989550964614, -82.57165191645701, -93.92077258335468, -98.78717783180508, 75.46607707383566, 88.6541862502111, 67.18613749054457, 23.503428061538912, -23.277588757620745, 132.37804757116814, -65.3640465097929, 158.22799767393315, 36.86726159207217, -24.495483453331147, 14.260276962212794, -62.66210825667298, 53.022994759039236, 117.80671310072779, 151.39676575846966, -166.15049439877794, -123.65424395258655, -56.35770791037097, 9.43026023356952, -46.273005745173116, -1.2449688142012585, 23.690054764722298, -93.89439884999467, 1.4452975781362376, 113.60499413674093]
0.7588616636136815
Created action
{'name': 'design', 'parameters': {'strategy': 'grow', 'size': 5}, 'molecules': [3159, 4798, 3942, 3868, 4335, 3846, 1413, 2758]}
['Scored', 'Tested', 'Tested', 'Tested',

[18:15:34] Can't kekulize mol.  Unkekulized atoms: 10 11 14
[18:15:34] Can't kekulize mol.  Unkekulized atoms: 9 10 13
[18:15:34] Can't kekulize mol.  Unkekulized atoms: 7 9 12
[18:15:34] Can't kekulize mol.  Unkekulized atoms: 9 11 14


0.7588616636136815
Created action
{'name': ['Noisy ABL1 pIC50', 'Noisy Log S', 'Noisy Log P'], 'parameters': {'batch_size': 40}, 'molecules': [4911, 4912, 4913, 4914, 4915, 4916, 4917, 4918, 4919, 4920, 4926, 4921, 4922, 4923, 4924, 4925, 4947, 4927, 4928, 4929, 4930, 4939, 4931, 4933, 4932, 4934, 4935, 4936, 4937, 4938, 4940, 4941, 4948, 4942, 4943, 4949, 4944, 4945, 4946, 4950]}
['Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed']
{'batch_size': 40}
[247.69579645250923, 22.786968939762332, 33.55147098270068, 131.3592489019067, -31.996388884824455, -42.207193048735

Exception ignored in: <function tqdm.__del__ at 0x7f3d92a8f560>
Traceback (most recent call last):
  File "/home/mrr/miniconda3/envs/chodera/lib/python3.11/site-packages/tqdm/std.py", line 1144, in __del__
KeyboardInterrupt: 


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/mrr/miniconda3/envs/chodera/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_635031/600563487.py", line 9, in <module>
    exp_loaded.run()
  File "/home/mrr/dev/chodera/dgym/dgym/experiment.py", line 47, in run
    observations, _, terminated, truncated, _ = self.drug_env.step(action)
                                                ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mrr/dev/chodera/dgym/dgym/envs/drug_env.py", line 140, in step
    self.library = self.perform_action(action)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mrr/dev/chodera/dgym/dgym/envs/drug_env.py", line 173, in perform_action
    self.test(molecules, test, **parameters)
  File "/home/mrr/dev/chodera/dgym/dgym/envs/drug_env.py", line 224, in test
    results = assay(molecules, **params)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/hom

In [170]:
# # exp_loaded.drug_agent.reset()
# # observations, _ = exp_loaded.drug_env.reset()
# action = exp_loaded.drug_agent.act(observations)
# print(action)
# observations, _, _, _, _ = exp_loaded.drug_env.step(action)