# Testing New Experiment API

Load data.

In [1]:
%reload_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore') 

import rdkit
import dgym as dg

# load all data
path = '../../dgym-data'

deck = dg.MoleculeCollection.load(
    f'{path}/DSi-Poised_Library_annotated.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

reactions = dg.ReactionCollection.from_json(
    path = f'{path}/All_Rxns_rxn_library_sorted.json',
    smarts_col = 'reaction_string',
    classes_col = 'functional_groups'
)

building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table('../../dgym-data/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

Load assays and utility functions.

In [2]:
import os

def get_tcp_objectives():

    dockstring_dir = f'{path}/dockstring_targets/'
    files = os.listdir(dockstring_dir)
    configs = sorted([f for f in files if 'conf' in f])
    targets = sorted([f for f in files if 'target' in f])

    idx = 0
    with open(dockstring_dir + configs[idx], 'r') as f:
        config_ = f.readlines()
        config_ = [c.replace('\n', '') for c in config_]
        config_ = [c.split(' = ') for c in config_ if c]
        config_ = {c[0]: float(c[1]) for c in config_}

    target = targets[idx]
    name = target.split('_')[0]

    config_.update({
        'size_x': 22.5,
        'size_y': 22.5,
        'size_z': 22.5,
    })

    from dgym.envs.oracle import \
        DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle
    from dgym.envs.utility import ClassicUtilityFunction

    config = {
        'search_mode': 'detailed',
        'scoring': 'vina',
        'seed': 5,
        **config_
    }

    pIC50_oracle = DockingOracle(
        f'{name} pIC50',
        receptor_path=f'{path}/dockstring_targets/{name}_target.pdbqt',
        config=config
    )
    qed_oracle = RDKitOracle('QED', descriptor='QED')
    log_P_oracle = RDKitOracle('Log P', descriptor='MolLogP')
    log_S_oracle = CatBoostOracle(
        'Log S', path='../dgym/envs/models/aqsolcb.model')
    
    return pIC50_oracle, qed_oracle, log_P_oracle, log_S_oracle

In [3]:
from dgym.envs.utility import ClassicUtilityFunction, MultipleUtilityFunction

# Get objectives
pIC50_oracle, qed_oracle, log_P_oracle, log_S_oracle = get_tcp_objectives()

# Define utility functions
pIC50_utility = ClassicUtilityFunction(
    pIC50_oracle, ideal=(9.5, 13), acceptable=(8, 13))
log_P_utility = ClassicUtilityFunction(
    log_P_oracle, ideal=(0.5, 1.85), acceptable=(-0.5, 3.5))
log_S_utility = ClassicUtilityFunction(
    log_S_oracle, ideal=(-3, 1), acceptable=(-4, 1))
# qed_utility = ClassicUtilityFunction(
#     qed_oracle, ideal=(0.65, 1.0), acceptable=(0.5, 1.0))

# Assemble assays and surrogate models
assays = [
    pIC50_oracle,
    log_P_oracle,
    log_S_oracle,
    # qed_oracle,
    pIC50_oracle.surrogate(sigma=1.0),
    log_P_oracle.surrogate(sigma=1.0),
    log_S_oracle.surrogate(sigma=1.0),
    # qed_oracle.surrogate(sigma=0.1),
]

# Environment tolerates acceptable ADMET
from copy import deepcopy
utility_agent = MultipleUtilityFunction(
    utility_functions = [pIC50_utility, log_P_utility, log_S_utility],
    weights = [0.8, 0.1, 0.1]
)
utility_env = deepcopy(utility_agent)
utility_env.utility_functions[1].ideal = utility_env.utility_functions[1].acceptable
utility_env.utility_functions[2].ideal = utility_env.utility_functions[2].acceptable

Instantiate designer.

In [4]:
from dgym.envs.designer import Designer, Generator

designer = Designer(
    Generator(building_blocks, fingerprints, sizes),
    reactions,
    cache = True
)

Pick 5 random starting hits.

In [5]:
# select first molecule
import random
def select_molecule(deck):
    initial_index = random.randint(0, len(deck) - 1)
    initial_molecule = deck[initial_index]
    if len(initial_molecule.reactants) == 2 \
        and designer.match_reactions(initial_molecule):
        return initial_molecule
    else:
        return select_molecule(deck)

initial_molecules = [select_molecule(deck) for _ in range(5)]
library = dg.MoleculeCollection(initial_molecules).update_annotations()

# Score molecules
for assay in assays:
    if 'Noisy' not in assay.name:
        results = assay(library)
        for molecule, result in zip(library, results):
            molecule.update_annotations({assay.name: result})

# Set status to tested
library.set_status('Tested', step=0)

Step Tested 0
Step Tested 0
Step Tested 0
Step Tested 0
Step Tested 0


Instantiate Environment and Agent.

In [6]:
from dgym.envs import DrugEnv
from dgym.agents import SequentialDrugAgent
from dgym.agents.exploration import EpsilonGreedy

drug_env = DrugEnv(
    designer = designer,
    library = library,
    assays = assays,
    utility_function = utility_env
)

# Construct sequence
design_grow = {'name': 'design', 'batch_size': 8, 'parameters': {'strategy': 'grow', 'size': 5}}
design_replace = {'name': 'design', 'batch_size': 8, 'parameters': {'strategy': 'replace', 'size': 5, 'temperature': 0.2}}
score = {'name': ['Noisy ABL1 pIC50', 'Noisy Log S', 'Noisy Log P'], 'batch_size': 8 * 5, 'parameters': {'batch_size': 40}}
make = {'name': 'make', 'batch_size': 8}
test = {'name': ['ABL1 pIC50', 'Log S', 'Log P'], 'batch_size': 8}
sequence = [design_replace, score, design_grow, score, make, test]

drug_agent = SequentialDrugAgent(
    sequence = sequence,
    exploration_strategy = EpsilonGreedy(epsilon=0.25),
    utility_function = utility_agent
)

In [7]:
def display_best(experiment):
    import numpy as np
    observations = experiment.drug_env.library
    utilities = experiment.drug_env.utility_function(
        observations, use_precomputed=True, method='average')
    for obs, utility in zip(observations, utilities):
        obs.update_annotations({'utility': utility})
    tested = observations.tested
    if tested:
        best_tested = (
            tested
            .annotations
            .sort_values('utility', ascending=False)
            [['ABL1 pIC50', 'Log S', 'Log P', 'utility']]
            .head()
        )
        display(best_tested)

In [8]:
from dgym.experiment import Experiment

experiment = Experiment(
    drug_env=drug_env,
    drug_agent=drug_agent
)

result = experiment.run()

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/100000 [00:00<?, ?it/s]

Memory before action: Memory Usage: 6.29 GB
Created action
{'name': 'design', 'parameters': {'strategy': 'replace', 'size': 5, 'temperature': 0.2}, 'molecules': [0, 4, 2, 3, 1]}
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Memory after action: Memory Usage: 6.44 GB
-1.1826376364908393
Memory before action: Memory Usage: 6.45 GB
Created action
{'name': ['Noisy ABL1 pIC50', 'Noisy Log S', 'Noisy Log P'], 'parameters': {'batch_size': 40}, 'molecules': [5, 6, 7, 8, 9, 10, 21, 11, 24, 12, 28, 13, 20, 14, 15, 16, 17, 18, 19, 22, 23, 25, 29, 26, 27]}
['Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed', 'Designed'


KeyboardInterrupt



## Restore old experiment

In [16]:
result = new_experiment.get_result(1)

In [17]:
import ast
import pandas as pd

def load(result):
    annotations = pd.DataFrame(result['annotations'])
    molecules = []
    for idx, annotation in annotations.iterrows():

        # Parse data structure
        annotation = annotation.to_dict()
        route = annotation.pop('Synthetic Route')
        try:
            route = ast.literal_eval(route)
        except:
            pass
        route['annotations'] = annotation

        # Load molecule
        molecule = dg.molecule.Molecule.load(route)

        # Append to library
        molecules.append(molecule)

    collection = dg.collection.MoleculeCollection(molecules)

In [18]:
from dgym.envs import DrugEnv
from dgym.agents import SequentialDrugAgent
from dgym.agents.exploration import EpsilonGreedy

new_drug_env = DrugEnv(
    designer = designer,
    library = collection,
    assays = assays,
    utility_function = utility_env
)

# Construct sequence
design_grow = {'name': 'design', 'batch_size': 8, 'parameters': {'strategy': 'grow', 'size': 5}}
design_replace = {'name': 'design', 'batch_size': 8, 'parameters': {'strategy': 'replace', 'size': 5, 'temperature': 0.2}}
score = {'name': ['Noisy ABL1 pIC50', 'Noisy Log S', 'Noisy Log P'], 'batch_size': 8 * 5, 'parameters': {'batch_size': 40}}
make = {'name': 'make', 'batch_size': 8}
test = {'name': ['ABL1 pIC50', 'Log S', 'Log P'], 'batch_size': 8}
sequence = [design_replace, score, design_grow, score, make, test]

drug_agent = SequentialDrugAgent(
    sequence = sequence,
    exploration_strategy = EpsilonGreedy(epsilon=0.25),
    utility_function = utility_agent
)

In [19]:
from dgym.experiment import Experiment

new_experiment = Experiment(
    drug_env=new_drug_env,
    drug_agent=drug_agent
)

result = new_experiment.run()

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/100000 [00:00<?, ?it/s]

Memory before action: Memory Usage: 6.90 GB
Created action
{'name': 'design', 'parameters': {'strategy': 'replace', 'size': 5, 'temperature': 0.2}, 'molecules': [367, 139, 220, 5, 155, 226, 325, 168]}
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Step Designed 0
Memory after action: Memory Usage: 6.92 GB
0.28970253030851006
Memory before action: Memory Usage: 6.92 GB
Created action
{'name': ['Noisy ABL1 pIC50', 'Noisy 

Exception ignored in: <function WeakSet.__init__.<locals>._remove at 0x7f11a71a0040>
Traceback (most recent call last):
  File "/home/mrr/miniconda3/envs/chodera/lib/python3.11/_weakrefset.py", line 39, in _remove
    def _remove(item, selfref=ref(self)):

KeyboardInterrupt: 

KeyboardInterrupt

