## Sort the SMILESClickChem Reactions

We want to sort the reactions by compatibility with the Enamine building block space.

First, load the data.

In [1]:
%reload_ext autoreload
%autoreload 2

import rdkit
rdkit.Chem.Draw.IPythonConsole.ipython_maxProperties = -1

import dgym as dg

# load all data
path = '../../../dgym-data'

deck = dg.MoleculeCollection.load(
    f'{path}/DSi-Poised_Library_annotated.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

reactions = dg.ReactionCollection.from_json(
    path = f'{path}/All_Rxns_rxn_library.json',
    smarts_col = 'reaction_string',
    classes_col = 'functional_groups'
)

building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table(f'{path}/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

Instantiate the designer.

In [2]:
from dgym.molecule import Molecule
from dgym.envs.designer import Designer, Generator
from dgym.envs.drug_env import DrugEnv
from dgym.agents import SequentialDrugAgent
from dgym.agents.exploration import EpsilonGreedy
from dgym.experiment import Experiment

designer = Designer(
    Generator(building_blocks, fingerprints, sizes),
    reactions,
    cache = True
)

Run reaction with 10,000 randomly chosen Enamine building block pairs.

In [25]:
from tqdm.notebook import tqdm
from collections import defaultdict
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')

worked = defaultdict(int)
random_combos = [[
    next(designer.generator()),
    next(designer.generator())
] for _ in range(10_000)]

records = []
for reaction in tqdm(reactions):
    num_compatible = 0
    for combo in random_combos:
        try:
            analogs = next(reaction.run(combo))
            num_compatible += 1
        except:
            pass
    records.append({'reaction': reaction.name, 'num_compatible': num_compatible})
reaction_compatibility = pd.DataFrame(records)

  0%|          | 0/94 [00:00<?, ?it/s]

Sort the results.

In [26]:
import pandas as pd

order = (
    pd.DataFrame(reaction_compatibility)
    .sort_values('num_compatible', ascending=True)
    .index
)

In [34]:
(
    pd.DataFrame(reaction_compatibility)
    .sort_values('num_compatible', ascending=False)
    .reset_index()
).to_csv('../../../dgym-data/reaction_compatibility_10000.csv')

Reorder the reactions.

In [47]:
import json
from collections import OrderedDict

reaction_json = json.load(open(f'{path}/All_Rxns_rxn_library.json', 'r'))
reaction_dict = OrderedDict(reaction_json)

for index in order:
    reaction_name = reactions[index].name
    reaction_dict.move_to_end(reaction_name, last=False)

Write to disk.

In [51]:
json.dump(reaction_dict, open(f'{path}/All_Rxns_rxn_library_sorted.json', 'w'))