# Docking Evaluator Scoring Function with Thompson Sampling for $A2B$ Receptor Library
Testing a score maximization mode and a Boltzmann Reweighted distribution mode.

In [1]:
import copy
import json
from ts_main import run_ts, parse_input_dict
from baseline import enumerate_library
import pandas as pd
from rdkit import Chem
from tqdm.auto import tqdm
import numpy as np
from evaluators import FredEvaluator
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt

In [3]:
tqdm.pandas()

In [4]:
# Suppress RDKit warnings
from rdkit import RDLogger 
RDLogger.DisableLog('rdApp.*') 

In [5]:
TS_BASE_DIR = "."

### Let's start with the standard Thompson sampling algorithm
Here we find the reagent with the maximized scoring utility.

In [6]:
a2b_json = """{
"reagent_file_list": [
        "TS_BASE_DIR/Data/adenosine_A2B/Input_files/aldehydes_input.smi",
        "TS_BASE_DIR/Data/adenosine_A2B/Input_files/di_oxo_input.smi",
        "TS_BASE_DIR/Data/adenosine_A2B/Input_files/di_nucleo_input.smi"
    ],
    "reaction_smarts": "[#6:1](=[O]).[#6][#6](=[#8])[#6][#6:2](=[#8])[*:3].[#7X2:6]~[#6X3:7](~[#7:8])~[#7X3H2:9]>>[#6:1]1[#6]([#6:2](=[#8])[*:3])=[#6]([#6])[#7:9]~[#6X3:7](~[#7X3H0:8])~[#7:6]1",
    "num_warmup_trials": 10,
    "num_ts_iterations": 50,
    "evaluator_class_name": "FredEvaluator",
    "evaluator_arg": {"design_unit_file": "./Data/adenosine_A2B/Crystal Structures/A2B_L_-_PREPARED_PROTEIN_A2B_L_-_PREPARED_LIGAND4_A__DU__apo_PHE_A-173.oedu"},
    "ts_mode": "maximize",
    "log_filename": "./Results/Logs/ts_logs_a2b_run1.txt",
    "results_filename": "./Results/ts_results_a2b_std_run_1.csv"
}""".replace("TS_BASE_DIR", TS_BASE_DIR)
a2b_dict = json.loads(a2b_json)

In [7]:
parse_input_dict(a2b_dict)

In [8]:
ts_df = run_ts(a2b_dict)

Warmup 1 of 3:   0%|          | 0/54 [00:00<?, ?it/s]

Warmup 2 of 3:   0%|          | 0/23 [00:00<?, ?it/s]

Warmup 3 of 3:   0%|          | 0/26 [00:00<?, ?it/s]

Cycle:   0%|          | 0/50 [00:00<?, ?it/s]

     score                                             SMILES         Name
0   1000.0     CC[N]C1=NC(c2cccc(OC)c2)C(C(=O)OCC(C)C)=C(C)N1  132_204_323
24  1000.0   CC[N]C1=NC(c2ccc(Cl)cc2Cl)C(C(=O)OCC(C)C)=C(C)N1  141_204_323
26  1000.0    C[N]C1=NC(c2ccc(Cl)cc2Cl)C(C(=O)OCC(C)C)=C(C)N1  141_204_322
27  1000.0      C[N]C1=NC(c2cccc(Cl)c2)C(C(=O)OCC(C)C)=C(C)N1  131_204_322
28  1000.0  CC1=C(C(=O)OCC(C)C)C(c2cccc(Cl)c2)N=C([N]c2ccc...  131_204_324
29  1000.0  CC1=C(C(=O)OCC(C)C)C(c2ccc(Cl)cc2Cl)N=C([N]C(=...  141_204_310
30  1000.0      C[N]C1=NC(c2ccc(Br)cc2)C(C(=O)OCC(C)C)=C(C)N1  136_204_322
32  1000.0  CC1=C(C(=O)OCC(C)C)C(c2ccc3ccccc3c2)N=C([N]c2c...  152_204_324
33  1000.0     CC[N]C1=NC(c2ccc(Br)cc2)C(C(=O)OCC(C)C)=C(C)N1  136_204_323
34  1000.0      CC(=O)[N]C1=NC(c2ccco2)C(C(=O)OCC(C)C)=C(C)N1  100_204_309


## Run TS with the Reweighted Boltzmann Sampling

In [9]:
a2b_dict_boltzmann = copy.copy(a2b_dict)
a2b_dict_boltzmann["ts_mode"] = "maximize_boltzmann"
a2b_dict_boltzmann["log_filename"] = "./Results/Logs/ts_boltzmann_logs_a2b_run1.txt"
a2b_dict_boltzmann["results_filename"] = "./Results/ts_boltzmann_results_a2b_std_run_1.csv"

In [10]:
a2b_dict_boltzmann

{'reagent_file_list': ['./Data/adenosine_A2B/Input_files/aldehydes_input.smi',
  './Data/adenosine_A2B/Input_files/di_oxo_input.smi',
  './Data/adenosine_A2B/Input_files/di_nucleo_input.smi'],
 'reaction_smarts': '[#6:1](=[O]).[#6][#6](=[#8])[#6][#6:2](=[#8])[*:3].[#7X2:6]~[#6X3:7](~[#7:8])~[#7X3H2:9]>>[#6:1]1[#6]([#6:2](=[#8])[*:3])=[#6]([#6])[#7:9]~[#6X3:7](~[#7X3H0:8])~[#7:6]1',
 'num_warmup_trials': 10,
 'num_ts_iterations': 50,
 'evaluator_class_name': 'FredEvaluator',
 'evaluator_arg': {'design_unit_file': './Data/adenosine_A2B/Crystal Structures/A2B_L_-_PREPARED_PROTEIN_A2B_L_-_PREPARED_LIGAND4_A__DU__apo_PHE_A-173.oedu'},
 'ts_mode': 'maximize_boltzmann',
 'log_filename': './Results/Logs/ts_boltzmann_logs_a2b_run1.txt',
 'results_filename': './Results/ts_boltzmann_results_a2b_std_run_1.csv',
 'evaluator_class': <evaluators.FredEvaluator at 0x310ca15d0>}

In [11]:
ts_boltzmann_df = run_ts(a2b_dict_boltzmann)

Warmup 1 of 3:   0%|          | 0/54 [00:00<?, ?it/s]

Warmup 2 of 3:   0%|          | 0/23 [00:00<?, ?it/s]

Warmup 3 of 3:   0%|          | 0/26 [00:00<?, ?it/s]

Cycle:   0%|          | 0/50 [00:00<?, ?it/s]

        score                                             SMILES         Name
42 -11.364771  CC1=C(C(=O)OCc2ccccc2)C(c2ccoc2)n2c(nc3ccccc32)N1  101_206_302
41 -10.477683  CC1=C(C(=O)OC2CCC2)C(c2ccsc2)n2c(nnc2SCc2ccccc...  103_218_319
36 -10.164180       CC1=C(C(=O)OC2CCC2)C(c2cc[nH]n2)N=C(N(C)C)N1  112_218_325
39  -9.722029  CC(=O)C1=C(C)Nc2nc3cc(Br)c(Br)cc3n2C1c1cccc(Cl)c1  131_207_305
9   -9.411142           CC1=C(C(=O)OC2CCCCC2)C(c2ccoc2)n2nnnc2N1  101_220_320
49  -9.123878     CSc1nnc2n1C(c1cccc(O)c1)C(C(=O)OCC1CC1)=C(C)N2  133_221_318
28  -8.702151  CC1=C(C(=O)C2CC2)C(c2ccccc2)n2c(nc3cc(Cl)c(Cl)...  126_208_304
27  -8.391085    CC1=C(C(=O)OC(C)C)C(c2nccs2)n2c(nnc2C(F)(F)F)N1  119_201_313
32  -7.898776  CC1=C(C(=O)OC2CCCC2)C(c2ncco2)n2c(nnc2N2CCN(C)...  118_219_316
46  -7.212148  COc1cc(OC)cc(C2C(C(=O)OC(C)C)=C(C)Nc3nc4cc(Br)...  145_201_305
