# Chemical reactions figure

In [1]:
%load_ext autoreload
%autoreload 2
import json
import zipfile
import pandas as pd
from tqdm.auto import tqdm
from io import TextIOWrapper
from collections import defaultdict

measure = 'sigma'
trial_indices = defaultdict(int)
records = []
path = '../../dgym-data/analysis/noise/selection_noise_2024-05-10_23-36-56_continued.zip'
with zipfile.ZipFile(path, 'r') as z:
    for filename in tqdm(z.namelist()):
        with z.open(filename) as file:
            with TextIOWrapper(file, encoding='utf-8') as text_file:
                result = json.load(text_file)
                
                # Get annotations
                record = pd.DataFrame(result['annotations']).reindex(
                    columns=[
                        'SMILES',
                        'Inspiration',
                        'Current Status',
                        'ABL1 pIC50',
                        'Log P',
                        'Log S',
                        'Noisy ABL1 pIC50',
                        'Noisy Log P',
                        'Noisy Log S',
                        'Step Tested',
                        'Step Scored',
                        'Synthetic Route',
                    ]
                )
                
                # Get trial number
                value = result[measure]
                trial_indices[value] += 1
                record[measure] = value
                record['trial'] = trial_indices[value]

                # Append
                records.append(record)
df = pd.concat(records)
records = None

  0%|          | 0/500 [00:00<?, ?it/s]

In [3]:
from dgym.envs.utility import MultipleUtilityFunction, ClassicUtilityFunction

# create evaluators
docking_utility_function = ClassicUtilityFunction(
    ideal=(9, 13),
    acceptable=(8, 13)
)

log_P_utility_function = ClassicUtilityFunction(
    ideal=(-0.5, 3.5),
    acceptable=(-0.5, 3.5)
)

log_S_utility_function = ClassicUtilityFunction(
    ideal=(-4, 1),
    acceptable=(-4, 1)
)

composite_utility_function = MultipleUtilityFunction(
    utility_functions = [docking_utility_function, log_P_utility_function, log_S_utility_function],
    weights = [0.8, 0.1, 0.1]
)

df['utility'] = composite_utility_function(
    df[['ABL1 pIC50', 'Log P', 'Log S']].values,
    precompute=True,
    method='average'
)

In [7]:
df.query('utility == 1.0')['Synthetic Route'].iloc[0]

{'product': 'CC(C(=O)N1CCC(O)(c2ccccc2O)C(C)C1)c1ccc(N2CCNCC2)cc1',
 'reaction': '81_Grignard_alcohol',
 'reactants': [{'product': 'CC1CN(C(=O)C(C)c2ccc(N3CCNCC3)cc2)CCC1=O',
   'reaction': '13_Carboxylate_and_Amine',
   'reactants': [{'product': 'CC(C(=O)O)c1ccc(N2CCNCC2)cc1',
     'reaction': '18_Halide_and_Amine',
     'reactants': [{'product': 'C1CNCCN1'},
      {'product': 'CC(C(=O)O)c1ccc(I)cc1'}]},
    {'product': 'CC1CNCCC1=O.Cl'}]},
  {'product': 'Oc1ccccc1Br'}]}