# Testing QSAR of Targets

In [1]:
%reload_ext autoreload
%autoreload 2

import rdkit
rdkit.Chem.Draw.IPythonConsole.ipython_maxProperties = -1

import dgym as dg

# load all data
path = '../../dgym-data'

deck = dg.MoleculeCollection.load(
    f'{path}/DSi-Poised_Library_annotated.sdf',
    reactant_names=['reagsmi1', 'reagsmi2', 'reagsmi3']
)

reactions = dg.ReactionCollection.from_json(
    path = f'{path}/All_Rxns_rxn_library.json',
    smarts_col = 'reaction_string',
    classes_col = 'functional_groups'
)

building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table('../../dgym-data/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

Picking a target.

In [54]:
import os

dockstring_dir = f'{path}/dockstring_targets/'
files = os.listdir(dockstring_dir)
configs = sorted([f for f in files if 'conf' in f])
targets = sorted([f for f in files if 'target' in f])

idx = 5
with open(dockstring_dir + configs[idx], 'r') as f:
    config_ = f.readlines()
    config_ = [c.replace('\n', '') for c in config_]
    config_ = [c.split(' = ') for c in config_ if c]
    config_ = {c[0]: float(c[1]) for c in config_}

target = targets[idx]
name = target.split('_')[0]

Docking.

In [55]:
from dgym.envs.oracle import DockingOracle, NeuralOracle, NoisyOracle
from dgym.envs.utility import ClassicUtilityFunction

config = {
    'search_mode': 'balanced',
    'scoring': 'gnina',
    'seed': 5,
    **config_
}

# Create noiseless evaluators
docking_oracle = DockingOracle(
    f'{name} affinity',
    receptor_path=f'{dockstring_dir}/{target}',
    config=config
)

Designer.

In [56]:
from dgym.molecule import Molecule
from dgym.envs.designer import Designer, Generator
from dgym.envs.drug_env import DrugEnv
from dgym.agents import SequentialDrugAgent
from dgym.agents.exploration import EpsilonGreedy
from dgym.experiment import Experiment

designer = Designer(
    Generator(building_blocks, fingerprints, sizes),
    reactions,
    cache = True
)

Generate molecules.

In [57]:
from tqdm.notebook import tqdm

# select first molecule
import random
def select_molecule(deck):
    initial_index = random.randint(0, len(deck))
    initial_molecule = deck[initial_index]
    if len(initial_molecule.reactants) == 2 \
        and designer.match_reactions(initial_molecule):
        return initial_molecule
    else:
        return select_molecule(deck)

random_molecules = []
for _ in tqdm(range(20)):
    
    # pick a molecule randomly from the deck
    initial_molecule = select_molecule(deck)

    # generate a few rounds of random molecules in REAL Space
    molecule = initial_molecule
    designer.reset_cache()
    for _ in range(3):
        molecule = designer.design(molecule, 1, temperature=1.0)[0]
    
    # generate a bunch of analogs
    molecules = designer.design(molecule, 10, temperature=1.0)
    
    random_molecules.extend(molecules)

  0%|          | 0/20 [00:00<?, ?it/s]

Get results of docking.

In [68]:
results = docking_oracle(random_molecules)

In [69]:
import chemfp

self = designer.generator

fingerprint_type = self.fingerprints.get_fingerprint_type()
fingerprints = [
    (m.name, fingerprint_type.from_smi(m.smiles))
    for m in random_molecules
]

queries = chemfp.load_fingerprints(
    fingerprints,
    metadata = fingerprint_type.get_metadata(),
    reorder=False
)

In [88]:
results = chemfp.simsearch(
    queries=queries,
    targets=queries,
    threshold=0.0,
    # include_lower_triangle=False,
    # k=50
)

similarity = results.to_pandas()

queries:   0%|                                                                                                …

In [89]:
similarity

Unnamed: 0,query_id,target_id,score
0,CCCCOc1ccc(-c2nnc(NC(=O)CN(CC(N)=O)c3ccc(C=O)c...,CCCCOc1ccc(-c2nnc(NC(=O)CN(CC(N)=O)c3ccc(C=O)c...,1.000000
1,CCCCOc1ccc(-c2nnc(NC(=O)CN(CC(N)=O)c3ccc(C=O)c...,NC(=O)CN(CC(=O)NC(=O)Cn1c(S)nc2sc3c(c2c1=O)CCC...,0.354167
2,CCCCOc1ccc(-c2nnc(NC(=O)CN(CC(N)=O)c3ccc(C=O)c...,COC(=O)N(C(=N)Sc1ccccn1)C(=O)CN(CC(N)=O)c1ccc(...,0.362637
3,CCCCOc1ccc(-c2nnc(NC(=O)CN(CC(N)=O)c3ccc(C=O)c...,Cc1cc(C)c(C(=O)NC2CCN(C(=O)CN(CC(N)=O)c3ccc(C=...,0.393258
4,CCCCOc1ccc(-c2nnc(NC(=O)CN(CC(N)=O)c3ccc(C=O)c...,Cc1cc(C)c(C(=O)N(C(=O)CN(CC(N)=O)c2ccc(C=O)cc2...,0.366667
...,...,...,...
39995,CC(C)(CO)CNCc1ccc(Oc2ncc(Br)cn2)cc1,CC1OC2(C)CCC1(NCc1ccc(Oc3ncc(Br)cn3)cc1)CC2,0.437500
39996,CC(C)(CO)CNCc1ccc(Oc2ncc(Br)cn2)cc1,O=C(c1ccco1)N1CCC(NCc2ccc(Oc3ncc(Br)cn3)cc2)CC1,0.385714
39997,CC(C)(CO)CNCc1ccc(Oc2ncc(Br)cn2)cc1,CN(Cc1ccc(Oc2ncc(Br)cn2)cc1)Cc1ccc(Br)cc1[N+](...,0.393939
39998,CC(C)(CO)CNCc1ccc(Oc2ncc(Br)cn2)cc1,CCOC(=O)C(Cc1ccncc1)NCc1ccc(Oc2ncc(Br)cn2)cc1,0.439394
