# Test Env

Import data.

In [1]:
%load_ext autoreload
%autoreload 2

import dgym as dg

# load all data
print('load data')
path = '../dgym-data'

deck = dg.MoleculeCollection.from_sdf(
    f'{path}/DSi-Poised_Library_annotated.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

reactions = dg.ReactionCollection.from_json(
    path = f'{path}/All_Rxns_rxn_library.json',
    smarts_col = 'reaction_string',
    classes_col = 'functional_groups'
)

load data


In [2]:
building_blocks = dg.datasets.enamine(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/out/Enamine_Building_Blocks_Stock_262336cmpd_20230630.fpb')

# align fingerprints to building blocks
print('align fingerprints')
fingerprints = dg.utils.sort_fingerprints(fingerprints, building_blocks)

# partition building blocks according to functional groups
print('partition building blocks')
templates = dg.utils.get_unique_reactants(reactions)
building_blocks = dg.utils.partition_building_blocks(building_blocks, templates, out_dir=path)

align fingerprints
partition building blocks


In [3]:
from dgym.envs.library_designer import LibraryDesigner
from dgym.envs.oracle import DGLOracle

library_designer = LibraryDesigner(
    reactions,
    building_blocks,
    fingerprints
)

lipo_oracle = DGLOracle('GCN_canonical_Lipophilicity')
esol_oracle = DGLOracle('GCN_canonical_ESOL')

In [302]:
from dgym.envs import DrugEnv

assays = [lipo_oracle, esol_oracle]
drug_env = DrugEnv(
    library_designer,
    library=deck[:200],
    assays=assays,
    budget=200
)
# is there data associated with the library? could be.

action = {
    'design': {
        'molecules': [0, 199],
        'num_analogs': 2,
        'fraction_random': 0.0
    }
}

drug_env.step(action)
# [display(p) for p in drug_env.library[6:]]

(MoleculeCollection with 404 Molecules, None, None, {})

First, set up the environment.

In [479]:
# def score(molecules, assays):

#     for assay in assays:
#         results = assay.predict(library)
#         for idx, molecule in enumerate(library):
#             molecule.update_annotations({assay.model_name: results[idx]})
    
#     return molecules

# library = score(library, assays)

In [480]:
from dgym.envs import DrugEnv

library = deck[:10]
assays = [lipo_oracle, esol_oracle]
drug_env = DrugEnv(
    library_designer,
    library = library,
    assays = [lipo_oracle, esol_oracle],
    budget = 10_000
)

Next, define the action mask and start performing assays (randomly).

In [348]:
mask = {
    'design': {
        'fraction_random': None,
        'num_analogs': None,
        'molecules': (None, drug_env.valid_actions)
    },
    'order': {
        'assay': None,
        'molecules': (None, drug_env.valid_actions)
    }
}

action = drug_env.action_space.sample(mask)
results = drug_env.step(action)
observations = results[0]
[i.annotations['assay_0'] for i in observations if 'assay_0' in i.annotations]

[1.7408784627914429,
 1.7279001474380493,
 1.8764628171920776,
 2.3975718021392822,
 1.8674098253250122]

In [475]:
best_observed = -np.inf

num_rounds = 10
for _ in range(num_rounds):

    # ideate
    action = {
        'design': {
            'molecules': range(len(molecules)),
            'num_analogs': 1, # maps to 10 molecules
            'fraction_random': 0.5
        }
    }
    library = drug_env.step(action)[0]

    # score
    assay_results = []
    for assay in assays:
        assay_results.append(assay.predict(library))

    # triage
    utility = np.mean(assay_results, axis=0)
    best_observed = max(np.max(utility), best_observed)
    chosen_molecules = utility.argsort()[-10:].tolist()

    # assay
    action = {'order': {'assay': 0, 'molecules': chosen_molecules}}
    library = drug_env.step(action)[0]
    print(library)
    
    # update model (TODO)

MoleculeCollection with 413 Molecules
MoleculeCollection with 820 Molecules
MoleculeCollection with 1226 Molecules
MoleculeCollection with 1630 Molecules
MoleculeCollection with 2027 Molecules
MoleculeCollection with 2426 Molecules
MoleculeCollection with 2823 Molecules
MoleculeCollection with 3226 Molecules
MoleculeCollection with 3629 Molecules
MoleculeCollection with 4040 Molecules


In [476]:
len(assay_results[0])

4040

In [477]:
utility[chosen_molecules]

array([0.39368074, 0.41250896, 0.42399023, 0.42849761, 0.42870724,
       0.46394673, 0.49186444, 0.49931121, 0.56182384, 0.62672341])

In [478]:
np.max(utility)

0.6267234086990356

In [453]:
best_observed

0.39085710048675537

In [443]:
# score
assay_results = []
for assay in assays:
    assay_results.append(assay.predict(library))


MoleculeCollection with 48 Molecules

In [32]:
from dgym.agents import DrugAgent

In [33]:
drug_agent = DrugAgent(drug_env.action_space)