# Docking Evaluator Scoring Function with Thompson Sampling for $M_{pro}$ Library
Testing a score maximization mode and a Boltzmann Reweighted distribution mode.

In [1]:
import copy
import json
from ts_main import run_ts, parse_input_dict
from baseline import enumerate_library
import pandas as pd
from rdkit import Chem
from tqdm.auto import tqdm
import numpy as np
from evaluators import FredEvaluator
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt

In [2]:
tqdm.pandas()

In [3]:
# Suppress RDKit warnings
from rdkit import RDLogger 
RDLogger.DisableLog('rdApp.*') 

In [4]:
TS_BASE_DIR = "."

### Let's start with the standard Thompson sampling algorithm
Here we find the reagent with the maximized scoring utility.

In [28]:
mpro_json = """{
"reagent_file_list": [
        "TS_BASE_DIR/Data/M_pro/Input_files/coupled_aa.smi",
        "TS_BASE_DIR/Data/M_pro/Input_files/aldehydes_input.smi",
        "TS_BASE_DIR/Data/M_pro/Input_files/acids_input.smi"
    ],
    "reaction_smarts": "[#7X3;H2:1].[#6:2](=O).[#6:3](=O)[O]>>[#6:2][#7:1][#6:3]",
    "num_warmup_trials": 10,
    "num_ts_iterations": 200,
    "evaluator_class_name": "FredEvaluator",
    "evaluator_arg": {"design_unit_file": "./Data/M_pro/crystal_structures/3CL_COVID_MOONSHOT_AB__DU__S5L_A-405.oedu"},
    "ts_mode": "maximize",
    "log_filename": "./Results/Logs/ts_logs_run1.txt",
    "results_filename": "./Results/ts_results_mpro_std_run_1.csv"
}""".replace("TS_BASE_DIR", TS_BASE_DIR)
mpro_dict = json.loads(mpro_json)

In [29]:
parse_input_dict(mpro_dict)

In [30]:
ts_df = run_ts(mpro_dict)

Warmup 1 of 3:   0%|          | 0/100 [00:00<?, ?it/s]

Warmup 2 of 3:   0%|          | 0/100 [00:00<?, ?it/s]

Warmup 3 of 3:   0%|          | 0/100 [00:00<?, ?it/s]

Cycle:   0%|          | 0/200 [00:00<?, ?it/s]

      score                                             SMILES         Name
175  1000.0  C#CCCCCCCCCCN(Cc1ccc(OCc2ccccc2)cc1)[C@H](CCC(...  123_205_304
14   1000.0  C#CCCCCCCCCCN(Cc1ccc(OCCCC)cc1)C(C(=O)N1CCC2(C...  196_204_304
152  1000.0  C#CCCCCCCCCCN(Cc1ccc(OCc2ccccc2)cc1)[C@H](CCC(...  125_205_304
153  1000.0  C#CCCCCCCCCCN(Cc1ccc(OCc2ccccc2)cc1)[C@H](CCC(...  135_205_304
148  1000.0  C#CCCCCCCCCCN(Cc1c(COP(=O)(O)O)cnc(C)c1O)[C@H]...  124_289_304
79   1000.0  CCCCOc1ccc(CN(Cc2ccc(F)cc2)[C@H](CCC(=O)NC2c3c...  124_204_374
158  1000.0  O=C(CC[C@H](C(=O)N1CCC(N2CCC[C@H]2C(=O)O)CC1)N...  124_205_386
160  1000.0  C#CCCCCCCCCCN(Cc1ccc(OCc2ccccc2)cc1)C(C(=O)N1C...  196_205_304
57   1000.0  C#CCCCCCCCCCN(Cc1ccc(OCc2ccccc2)cc1)[C@H](CCC(...  124_205_304
161  1000.0  C#CCCCCCCCCCN(Cc1ccc(OCc2ccccc2)cc1)[C@H](CCC(...  120_205_304


## Run TS with the Reweighted Boltzmann Sampling

In [31]:
mpro_dict_boltzmann = copy.copy(mpro_dict)
mpro_dict_boltzmann["ts_mode"] = "maximize_boltzmann"
mpro_dict_boltzmann["log_filename"] = "./Results/Logs/ts_boltzmann_logs_run1.txt"
mpro_dict_boltzmann["results_filename"] = "./Results/ts_boltzmann_results_mpro_std_run_1.csv"

In [32]:
mpro_dict_boltzmann

{'reagent_file_list': ['./Data/M_pro/Input_files/coupled_aa.smi',
  './Data/M_pro/Input_files/aldehydes_input.smi',
  './Data/M_pro/Input_files/acids_input.smi'],
 'reaction_smarts': '[#7X3;H2:1].[#6:2](=O).[#6:3](=O)[O]>>[#6:2][#7:1][#6:3]',
 'num_warmup_trials': 10,
 'num_ts_iterations': 200,
 'evaluator_class_name': 'FredEvaluator',
 'evaluator_arg': {'design_unit_file': './Data/M_pro/crystal_structures/3CL_COVID_MOONSHOT_AB__DU__S5L_A-405.oedu'},
 'ts_mode': 'maximize_boltzmann',
 'log_filename': './Results/Logs/ts_boltzmann_logs_run1.txt',
 'results_filename': 'ts_boltzmann_results_mpro_std_run_1.csv',
 'evaluator_class': <evaluators.FredEvaluator at 0x320a8d850>}

In [33]:
ts_boltzmann_df = run_ts(mpro_dict_boltzmann)

Warmup 1 of 3:   0%|          | 0/100 [00:00<?, ?it/s]

Warmup 2 of 3:   0%|          | 0/100 [00:00<?, ?it/s]

Warmup 3 of 3:   0%|          | 0/100 [00:00<?, ?it/s]

Cycle:   0%|          | 0/200 [00:00<?, ?it/s]

        score                                             SMILES         Name
195 -8.940859  O=C(O)COC[C@H]1CCCN1C(=O)[C@H](Cc1ccc(O)c(O)c1...  106_226_315
117 -7.988661  C/C(=C\c1ccccc1)CN(Cc1ccc(I)cc1)[C@@H](Cc1ccc(...  108_238_377
171 -7.826283  C/C=C/CN(Cc1c[nH]c2ccccc12)[C@H](Cc1cc(Br)c(O)...  148_235_319
151 -7.808753  CON(CCC(=O)O)C(=O)[C@H](Cc1ccc(O)c(O)c1)N(Cc1c...  109_271_382
31  -7.738667  C#CCCCCCN(CC1CCCCCCC1)[C@H](Cc1cc(Br)c(O)c(Br)...  145_269_303
16  -7.732718  CC(CN(C/C=C/c1ccc(-c2ccc(Cl)cc2)o1)[C@@H](Cc1c...  108_276_386
186 -7.614391  C[C@@H](C(=O)O)N(C)C(=O)C1(N(Cc2cccc(Cl)c2)Cc2...  164_209_364
56  -7.582836  O=C(O)C1CN(C(=O)[C@H](Cc2ccc(O)c(O)c2)N(Cc2ccc...  114_209_309
190 -7.491386  N#CCCN(Cc1cccc(Cl)c1F)[C@@H](Cc1ccc(O)c(O)c1)C...  115_210_307
150 -7.410376  C/C(=C\c1ccccc1)CN(Cc1ccc(Br)cc1)C1(C(=O)N2CSC...  165_203_318
