# Testing DrugAgent and DrugEnv APIs

Load data.

In [1]:
%reload_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore') 

import rdkit
import dgym as dg

# load all data
path = '../../dgym-data'

deck = dg.MoleculeCollection.load(
    f'{path}/DSi-Poised_Library_annotated.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

reactions = dg.ReactionCollection.from_json(
    path = f'{path}/All_Rxns_rxn_library_sorted.json',
    smarts_col = 'reaction_string',
    classes_col = 'functional_groups'
)

building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table('../../dgym-data/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

Load assays and utility functions.

In [2]:
import os

def get_tcp_objectives():

    dockstring_dir = f'{path}/dockstring_targets/'
    files = os.listdir(dockstring_dir)
    configs = sorted([f for f in files if 'conf' in f])
    targets = sorted([f for f in files if 'target' in f])

    idx = 0
    with open(dockstring_dir + configs[idx], 'r') as f:
        config_ = f.readlines()
        config_ = [c.replace('\n', '') for c in config_]
        config_ = [c.split(' = ') for c in config_ if c]
        config_ = {c[0]: float(c[1]) for c in config_}

    target = targets[idx]
    name = target.split('_')[0]

    config_.update({
        'size_x': 22.5,
        'size_y': 22.5,
        'size_z': 22.5,
    })

    from dgym.envs.oracle import \
        DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle
    from dgym.envs.utility import ClassicUtilityFunction

    config = {
        'search_mode': 'detailed',
        'scoring': 'vina',
        'seed': 5,
        **config_
    }

    pIC50_oracle = DockingOracle(
        f'{name} pIC50',
        receptor_path=f'{path}/dockstring_targets/{name}_target.pdbqt',
        config=config
    )
    log_P_oracle = RDKitOracle('Log P', descriptor='MolLogP')
    log_S_oracle = CatBoostOracle(
        'Log S', path='../dgym/envs/models/aqsolcb.model')
    
    return pIC50_oracle, log_P_oracle, log_S_oracle

In [3]:
from dgym.envs.utility import ClassicUtilityFunction, MultipleUtilityFunction

# Get objectives
pIC50_oracle, log_P_oracle, log_S_oracle = get_tcp_objectives()

# Define utility functions
pIC50_utility = ClassicUtilityFunction(
    pIC50_oracle, ideal=(9.5, 13), acceptable=(8, 13))
log_P_utility = ClassicUtilityFunction(
    log_P_oracle, ideal=(0.5, 1.85), acceptable=(-0.5, 3.5))
log_S_utility = ClassicUtilityFunction(
    log_S_oracle, ideal=(-3, 1), acceptable=(-4, 1))

# Assemble assays and surrogate models
assays = [
    pIC50_oracle,
    log_P_oracle,
    log_S_oracle,
    pIC50_oracle.surrogate(sigma=1.0),
    log_P_oracle.surrogate(sigma=1.0),
    log_S_oracle.surrogate(sigma=1.0),
]

# Environment tolerates acceptable ADMET
from copy import deepcopy
utility_agent = MultipleUtilityFunction(
    utility_functions = [pIC50_utility, log_P_utility, log_S_utility],
    weights = [0.8, 0.1, 0.1]
)
utility_env = deepcopy(utility_agent)
utility_env.utility_functions[1].ideal = utility_env.utility_functions[1].acceptable
utility_env.utility_functions[2].ideal = utility_env.utility_functions[2].acceptable

Instantiate designer.

In [4]:
from dgym.envs.designer import Designer, Generator

designer = Designer(
    Generator(building_blocks, fingerprints, sizes),
    reactions,
    cache = True
)

Pick 5 random starting hits.

In [5]:
# select first molecule
import random
def select_molecule(deck):
    initial_index = random.randint(0, len(deck) - 1)
    initial_molecule = deck[initial_index]
    if len(initial_molecule.reactants) == 2 \
        and designer.match_reactions(initial_molecule):
        return initial_molecule
    else:
        return select_molecule(deck)

initial_molecules = [select_molecule(deck) for _ in range(5)]
library = dg.MoleculeCollection(initial_molecules).update_annotations()

Instantiate Environment and Agent.

In [6]:
from dgym.envs import DrugEnv
from dgym.agents import SequentialDrugAgent
from dgym.agents.exploration import EpsilonGreedy

drug_env = DrugEnv(
    designer = designer,
    library = library,
    assays = assays,
    utility_function = utility_env
)

# Construct sequence
design_grow = {'name': 'design', 'batch_size': 5, 'parameters': {'temperature': 1.0, 'strategy': 'grow', 'size': 8}}
design_replace = {'name': 'design', 'batch_size': 5, 'parameters': {'temperature': 1.0, 'strategy': 'replace', 'size': 8}}

make = {'name': 'make', 'batch_size': 40}
test = {'name': ['ABL1 pIC50', 'Log S', 'Log P'], 'batch_size': 8}
test_surrogate = {'name': ['Noisy ABL1 pIC50', 'Noisy Log S', 'Noisy Log P'], 'batch_size': 40}
design_and_score = [design_replace, test_surrogate]

sequence = [*(design_and_score * 1), design_grow, test_surrogate, make, test]

drug_agent = SequentialDrugAgent(
    sequence = sequence,
    exploration_strategy = EpsilonGreedy(epsilon=0.0),
    utility_function = utility_agent
)

In [None]:
import json
from tqdm.notebook import tqdm

drug_agent.reset()
observations, _ = drug_env.reset()
for _ in tqdm(range(50)):
    action = drug_agent.act(observations)
    print(action)
    print(drug_env.library[action['molecules']].index)
    print(drug_env.library[action['molecules']].annotations['status'].values)
    observations, reward, truncated, terminated, _ = drug_env.step(action)
    if action['name'] == 'design':
        print(reward)

  0%|          | 0/50 [00:00<?, ?it/s]

{'name': 'design', 'parameters': {'temperature': 1.0, 'strategy': 'replace', 'size': 8}, 'molecules': [0, 1, 2, 3, 4]}
[0, 1, 2, 3, 4]
[None None None None None]
-inf
{'name': ['Noisy ABL1 pIC50', 'Noisy Log S', 'Noisy Log P'], 'parameters': {}, 'molecules': [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44]}
[5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44]
['designed' 'designed' 'designed' 'designed' 'designed' 'designed'
 'designed' 'designed' 'designed' 'designed' 'designed' 'designed'
 'designed' 'designed' 'designed' 'designed' 'designed' 'designed'
 'designed' 'designed' 'designed' 'designed' 'designed' 'designed'
 'designed' 'designed' 'designed' 'designed' 'designed' 'designed'
 'designed' 'designed' 'designed' 'designed' 'designed' 'designed'
 'designed' 'designed' 'de

In [25]:
observations.scored.annotations['status']

0    scored
1    scored
2    scored
3    scored
4    scored
Name: status, dtype: object

In [50]:
import numpy as np
np.array([o.status for o in observations]) == observations.annotations['status'].values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

In [63]:
observations.scored.index

[30, 31, 32, 33, 34]

In [62]:
observations[observations.scored.index].annotations['status']

0    made
1    made
2    made
3    made
4    made
Name: status, dtype: object

In [29]:
observations[observations.scored.index].annotations['status']

0    made
1    made
2    made
3    made
4    made
Name: status, dtype: object

In [19]:
observations[(observations.scored + observations.tested).index].annotations['status'].values

array(['made', 'made', 'made', 'made', 'made', 'tested', 'made', 'made',
       'made', 'made', 'made', 'tested'], dtype=object)

In [15]:
observations.annotations['status'].values

array(['made', 'made', 'made', 'tested', 'made', 'made', 'tested', 'made',
       'made', 'made', 'made', 'made', 'made', 'tested', 'made', 'made',
       'made', 'made', 'made', 'made', 'made', 'made', 'made', 'made',
       'tested', 'tested', 'tested', 'made', 'made', 'tested', 'made',
       'made', 'made', 'made', 'made', 'scored', 'scored', 'scored',
       'scored', 'scored', 'designed', 'designed', 'designed', 'designed',
       'designed', 'designed', 'designed', 'designed', 'designed',
       'designed', 'designed', 'designed', 'designed', 'designed',
       'designed', 'designed', 'designed', 'designed', 'designed',
       'designed', 'designed', 'designed', 'designed', 'designed',
       'designed', 'designed', 'designed', 'designed', 'designed',
       'designed', 'designed', 'designed', 'designed', 'designed',
       'designed', 'designed', 'designed', 'designed', 'designed',
       'designed', 'designed', 'designed', 'designed', 'designed',
       'designed'], dtype=obje

In [90]:
drug_env.library[action['molecules']].annotations.reindex(columns=[*drug_env.assays, 'status'])

Unnamed: 0,ABL1 pIC50,Log P,Log S,Noisy ABL1 pIC50,Noisy Log P,Noisy Log S,status
0,,,,6.491044,-0.320219,-3.039522,scored
1,7.198526,2.72194,-4.022926,8.973864,3.980826,-3.925592,tested
2,7.312524,1.3143,-4.409591,7.446356,2.382751,-4.913456,tested
3,,,,6.30838,2.728656,-3.700461,made
4,7.038151,3.8315,-5.019697,8.417149,4.602984,-4.552683,tested


In [74]:
action

{'name': 'design',
 'parameters': {'temperature': 1.0, 'strategy': 'replace', 'size': 8},
 'molecules': [22, 28, 12, 4, 29]}

In [57]:
observations.annotations[['ABL1 pIC50', 'Log S', 'Log P', 'status']].dropna(how='all', subset=['ABL1 pIC50', 'Log S', 'Log P'])

Unnamed: 0,ABL1 pIC50,Log S,Log P,status
7,6.942704,-3.113913,2.1918,tested
17,7.588063,-2.666564,2.2621,tested
18,7.981968,-3.87981,2.5905,tested
22,7.60366,,,made
23,7.177941,-4.867824,4.7446,made
24,6.913399,-5.377892,5.1176,tested
29,6.612932,-4.320989,4.8225,tested
30,8.447677,-4.450373,4.07884,tested
32,8.296485,-4.884895,4.4465,tested
34,7.932145,-3.943795,3.7841,tested


In [39]:
observations[[20, 41, 31, 103, 42, 9, 88, 83]].annotations

Unnamed: 0,smiles,design_cycle,reactants,status,timestep,inspiration,Noisy ABL1 pIC50,Noisy Log S,Noisy Log P,ABL1 pIC50,Log S,Log P
0,CCC(=O)N(C(=O)CCl)c1cc(C)c(Cl)cc1OC,0,"['COc1cc(Cl)c(C)cc1NC(=O)CCl', 'CCC(=O)O']",made,1,CCC(=O)NCc1nnc2n1CCCC2,8.839655,-3.116612,3.346494,5.855191,,
1,Cc1nc2c(s1)CCCC2C(=O)N1CCN(S(=O)(=O)c2ccc(C(F)...,0,"['Cl.O=S(=O)(c1ccc(C(F)(F)F)cc1)N1CCNCC1', 'Cc...",made,1,CNC(=O)C1CCCc2sc(C)nc21,10.205361,-5.145502,4.579769,7.496795,,
2,O=C(CN1CCc2ccccc2C1)c1cc(F)cc(F)c1,0,"['c1ccc2c(c1)CCNC2', 'O=CC(=O)c1cc(F)cc(F)c1']",tested,1,Oc1ccccc1CN1CCc2ccccc2C1,10.585507,-5.235613,3.455707,8.268876,-4.633486,3.2058
3,CNCc1cccc(NC(=O)CC(C(=O)O)C(CC(=O)O)C(=O)O)c1,0,"['CNCc1cccc(N)c1', 'O=C(O)CC(C(=O)O)C(CC(=O)O)...",made,2,O=C(O)CC(C(=O)O)C(CC(=O)NCc1nnc2n1CCCC2)C(=O)O,7.993053,-4.286957,2.890643,6.802473,,
4,CCN(c1ccccc1)S(=O)(=O)c1cc(C(=O)NC)c(Cl)cc1Cl,0,"['CN.Cl', 'CCN(c1ccccc1)S(=O)(=O)c1cc(C(=O)O)c...",made,1,CNC(=O)C1CCCc2sc(C)nc21,8.160268,-4.820932,3.203438,7.514403,,
5,COc1cc(F)c(Cl)cc1CNC(=O)c1cnccn1,0,"['COc1cc(F)c(Cl)cc1CN.Cl', 'O=C(O)c1cnccn1']",made,1,O=C(NCC1CCOC1)c1cnccn1,7.975087,-1.865702,5.261131,6.363187,,
6,CCNS(=O)(=O)c1ccc(C=CC(=O)N(C(N)=O)C(=O)Oc2ccc...,0,"['NC(=O)NC(=O)Oc1ccccc1', 'CCNS(=O)(=O)c1ccc(/...",tested,2,CCNS(=O)(=O)c1ccc(C=CC(=O)NCc2nnc3n2CCCC3)cc1,8.742915,-2.574142,1.698716,7.481269,-3.806405,2.1043
7,Cc1cc(C(F)(F)F)nnc1NC(=O)c1cc([N+](=O)[O-])cc(...,0,"['Cc1cc(C(F)(F)F)nnc1N', 'O=C(O)c1cc([N+](=O)[...",made,2,Cc1cc(C(F)(F)F)nnc1NC(=O)c1cnccn1,8.466873,-6.937523,2.455468,7.567571,,


In [34]:
observations.annotations.reindex(columns=[*drug_env.assays, 'status']).iloc[-250:-200]

Unnamed: 0,ABL1 pIC50,Log P,Log S,Noisy ABL1 pIC50,Noisy Log P,Noisy Log S,status
51,,,,5.926308,2.674148,-3.136682,made
52,,,,6.315637,3.485318,-4.069968,made
53,,,,6.8033,3.660136,-4.369562,scored
54,,,,4.301885,3.539542,-5.065851,made
55,,,,4.254468,5.369514,-4.207737,scored
56,,,,6.566955,3.273383,-2.742577,scored
57,,,,4.928968,4.030144,-1.874605,made
58,,,,4.526909,5.227335,-5.092585,made
59,,,,6.943569,4.845967,-6.238683,scored
60,,,,5.860077,3.624026,-4.572837,made


In [70]:
observations[action['molecules']].annotations['status']

0      made
1      made
2      made
3    tested
4      made
5      made
6    tested
7      made
Name: status, dtype: object

In [74]:
action

{'name': ['ABL1 pIC50', 'Log S', 'Log P'],
 'parameters': {},
 'molecules': [43, 116, 81, 41, 118, 113, 109, 94]}

In [80]:
observations[action['molecules']].made

MoleculeCollection with 6 Molecules

In [76]:
np.array(observations.made.index)

array([  0,   2,   3,   7,   8,   9,  10,  11,  12,  14,  15,  17,  18,
        19,  20,  22,  24,  25,  26,  27,  28,  30,  34,  35,  36,  38,
        39,  40,  42,  43,  44,  45,  48,  49,  50,  51,  52,  55,  58,
        59,  60,  62,  63,  66,  67,  69,  70,  71,  72,  73,  75,  76,
        77,  78,  81,  82,  85,  86,  90,  92,  93,  94, 100, 106, 108,
       110, 112, 113, 114, 116, 118, 121, 123, 130, 131, 138, 160, 165,
       166])

In [13]:
action

{'name': 'design',
 'parameters': {'temperature': 1.0, 'strategy': 'replace', 'size': 8},
 'molecules': [0, 1, 2, 3, 4]}

In [36]:
from dgym.molecule import Molecule

Molecule.load({
    'reaction': '13_Carboxylate_and_Amine',
    # 'product': 'CNC1(C(N)=O)CCCC1',
    'reactants': [{'search': 'fixed', 'product': 'CNC1(C(N)=O)CCCC1'}, {'search': 'random', 'size_limit': 10, 'seed': 6871370448492419224}]
})

AttributeError: 'NoneType' object has no attribute 'GetPropsAsDict'

In [29]:
designer.design(observations[7], strategy='grow')

AttributeError: 'NoneType' object has no attribute 'GetPropsAsDict'

In [154]:
import numpy as np
sort = np.argsort(drug_env.utility_function(observations.tested, use_precomputed=True, method='average'))

In [171]:
print(json.dumps(sequence, indent=4))

[
    {
        "name": "design",
        "batch_size": 5,
        "parameters": {
            "temperature": 1.0,
            "size": 8
        }
    },
    {
        "name": "Noisy ABL1 pIC50",
        "batch_size": 40,
        "parameters": {}
    },
    {
        "name": "Noisy Log S",
        "batch_size": 40,
        "parameters": {}
    },
    {
        "name": "Noisy Log P",
        "batch_size": 40,
        "parameters": {}
    },
    {
        "name": "make",
        "batch_size": 40,
        "parameters": {}
    },
    {
        "name": "ABL1 pIC50",
        "batch_size": 40,
        "parameters": {}
    },
    {
        "name": "Log S",
        "batch_size": 40,
        "parameters": {}
    },
    {
        "name": "Log P",
        "batch_size": 40,
        "parameters": {}
    }
]
