# dgym Spaces

`order_space` is the space for elaborating assay orders.

In [119]:
from gymnasium.spaces import (
    Sequence, Discrete, Tuple, Box, Dict
)

assays = ['lipophilicity', 'solubility', 'potency']
initial_hits = [0, 1, 2, 3, 4, 5]

assay_space = Discrete(
    len(assays),
    seed=0,
    start=0
)

molecule_space = Discrete(
    len(initial_hits),
    seed=0,
    start=0
)

order_space = Sequence(
    Tuple([assay_space, molecule_space])
)

# order_space.feature_space.spaces[1].n = 15

In [120]:
import torch

measurement_space = Box(-torch.inf, torch.inf)
observation_space = Sequence(
    Tuple([assay_space, molecule_space, measurement_space])
)

## Test

In [121]:
%load_ext autoreload
%autoreload 2

import dgym as dg
from dgym import collection, repertoire

# load all data
print('load data')
path = '../dgym-data'

deck = collection.from_sdf(
    f'{path}/DSi-Poised_Library_annotated.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

repertoire = repertoire.from_json(
    path = f'{path}/All_Rxns_rxn_library.json',
    smarts_col = 'reaction_string',
    classes_col = 'functional_groups'
)

building_blocks = dg.datasets.enamine(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fps = dg.datasets.fingerprints(f'{path}/out/Enamine_Building_Blocks_Stock_262336cmpd_20230630.fpb')

# align fingerprints to building blocks
print('align fingerprints')
fps = dg.utils.sort_fingerprints(fps, building_blocks)

# partition building blocks according to functional groups
print('partition building blocks')
templates = dg.utils.get_unique_reactants(repertoire)
building_blocks = dg.utils.partition_building_blocks(building_blocks, templates, out_dir=path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
load data
align fingerprints
partition building blocks


`design_space` is the space for specifying hyperparameters of the synthetic library design.

In [139]:
batch_sizes = [5]
design_space = Dict(
    {
        # 0 -> 5, 1 -> 10, 2 -> 96, 3 -> 384
        'num_analogs': Discrete(len(batch_sizes), seed=0, start=0),
        # proportion random argsort
        'percent_random': Box(low=0, high=1),
    }
)

In [178]:
%%time
lib_params = design_space.sample()
print(lib_params)
num_analogs = batch_sizes[lib_params['num_analogs']]
percent_random = 0.5 # lib_params['percent_random']

compound = deck[400]
analogs = dg.synthesis.enumerate_analogs(
    compound,
    repertoire,
    building_blocks,
    num_analogs=num_analogs,
    sortby={'random': percent_random, 'fingerprint': 1 - percent_random},
    fps=fps
)

# len([display(a) for a in analogs])
len(analogs)

OrderedDict([('num_analogs', 0), ('percent_random', array([0.33064672], dtype=float32))])
CPU times: user 176 ms, sys: 0 ns, total: 176 ms
Wall time: 190 ms


20

In [219]:
import numpy as np
from gymnasium.spaces import (
    Dict, Discrete, Box, Sequence, Tuple
)

max_molecules = 10_000

# Define the action space
action_space = Dict({
    'design': Dict({
        'num_analogs': Discrete(max_molecules),
        'percent_random': Box(low=0.0, high=1.0, shape=(1,))
    }),
    'order': Dict({
        'assay': Discrete(3),  # For example, if you have 3 assays
        'molecule': Discrete(max_molecules)
    })
})

# Initialize the action mask
action_mask = np.zeros(max_molecules, dtype=bool)

In [216]:
action_space.sample()['design']

OrderedDict([('num_analogs', 5789),
             ('percent_random', array([0.77109563], dtype=float32))])