In [1]:
import math
import random
import plotly.graph_objects as go
# import plotly.offline as pyo
# pyo.init_notebook_mode()
import pickle
import pandas as pd
import numpy as np
from tqdm import tqdm
from rdkit import Chem
import os

In [2]:


run_id = '202305-2911-2320-5a95df0e-3008-4ebe-acd8-ecb3b50607c7'
# input_file = f'Runs/{run_id}/routes_df.csv'
# routes_df = pd.read_csv(input_file)

input_file_routes = f'Runs/{run_id}/targ_routes.pickle'
input_file_distances = f'Runs/{run_id}/targ_to_purch_distances.pickle'

with open(input_file_routes, 'rb') as handle:
    targ_routes_dict = pickle.load(handle)
    
# Load distances data
with open(input_file_distances, 'rb') as handle:
    distances_dict = pickle.load(handle)



img_size = 800



In [3]:
nr_targets = 10
seed= 42
random.seed(seed)
sample_targets = random.sample(list(targ_routes_dict.keys()), nr_targets)
sample_targets = [x for x in sample_targets if "/" not in x]
sample_targets = [
#     "FC(F)(F)c1csc(Nc2cc3ccccc3cn2)n1", # Sample 2 step route
    "CS(=O)(=O)Cc1cccc2c(C(c3ccc(Cl)cc3F)C3CC3C#N)c[nH]c12", # Example for building tree
    "O=C(NCCc1ccc(Cl)cc1C1CC1)c1ccc(Oc2cc3c(cc2Cl)C(C(=O)O)CCO3)cc1", # Sample 3 step route
    "C=CC(OCOC)c1ccc(N(c2cc3oc(-c4ccc(F)cc4)c(C(=O)NC)c3cc2C2CC2)S(C)(=O)=O)cc1Cl", # Sample 3 step route - complex end
    
] + sample_targets

In [4]:
sample_targets

['CS(=O)(=O)Cc1cccc2c(C(c3ccc(Cl)cc3F)C3CC3C#N)c[nH]c12',
 'O=C(NCCc1ccc(Cl)cc1C1CC1)c1ccc(Oc2cc3c(cc2Cl)C(C(=O)O)CCO3)cc1',
 'C=CC(OCOC)c1ccc(N(c2cc3oc(-c4ccc(F)cc4)c(C(=O)NC)c3cc2C2CC2)S(C)(=O)=O)cc1Cl',
 'Brc1cc(I)sc1-c1nnc[nH]1',
 'COc1ccc2nc(-c3ccc(C)c(NC(=O)C(C)(C)C)c3)c(-c3ccc(F)cc3)n2n1',
 'CCOC(=O)Cc1ccc(Oc2ccc3ncccc3c2N)c(OC)c1',
 'CCCc1c(Cc2ccc(-c3ccccc3-c3noc(=O)[nH]3)cc2)c(=O)n(C2CCC(=CCO)CC2)c2ncnn12',
 'Cc1cn2cc(Br)cc(S(N)(=O)=O)c2n1',
 'CC(C)N1CCN(C(CN2CCN(CCCc3ccccc3-c3ccccc3)CC2)c2ccc(F)cc2)CC1',
 'COCC[C@H](Oc1ncnc2c1cnn2-c1ncccc1Cl)C(=O)Nc1ccc(C)cn1',
 'CCCCCNc1nc(N)nc(C)c1Cc1ccc(OCCN(C)C)cc1',
 'Cc1ccc(NS(=O)(=O)Nc2ncnc(OCCOc3ncc(Br)cn3)c2-c2ccc(C)cc2)cc1']

In [5]:

for sample_target in tqdm(sample_targets):
    output_folder = f"{sample_target}.jpg"
    output_folder = f"Plots/{run_id}/{sample_target}"

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    print(sample_target)
    sample_routes = targ_routes_dict[sample_target]["route_1"].sort_values('depth')
#     print(sample_routes)
    sample_distances = distances_dict[sample_target]
#     print(sample_distances)
    
    sample_routes_with_dist = pd.merge(sample_routes[["label", "smiles", "depth"]],sample_distances,on='smiles', how='outer')
    sample_routes_with_dist["label"] = sample_routes_with_dist["label"].fillna('not_in_route')
    sample_routes_with_dist.loc[sample_routes_with_dist['label']=='Target','distance_to_target_rank'] = 0
    sample_routes_with_dist = sample_routes_with_dist.sort_values('distance_to_target_rank').reset_index()
#     print(sample_routes_with_dist)
    
    # 1. Plot target
    target_mol = Chem.MolFromSmiles(sample_target)
    fig_target = Chem.Draw.MolToImage(
        target_mol, 
        size=(img_size, img_size), 
        kekulize=True, 
        wedgeBonds=True, 
        fitImage=False, 
        options=None, 
        canvas=None
    )
#     fig_target.show()
    fig_target = fig_target.save(f"{output_folder}/target.jpg")
    
    # 2. Plot molecules in route
    molecules_in_route = [Chem.MolFromSmiles(x) for x in sample_routes['smiles']]

    fig_mol_in_route = Chem.Draw.MolsToGridImage(
        mols=molecules_in_route, 
        molsPerRow=3, 
        subImgSize=(img_size, img_size), 
        legends=list("Depth: " + sample_routes['depth'].astype(str) + "\n" + 
                     "Rank: " + sample_routes['distance_to_target_rank'].round(0).astype(str)  + "\n" + 
                     "Distance: " + sample_routes['Tanimoto_distance_from_target'].round(3).astype(str)), 
        highlightAtomLists=None, 
        highlightBondLists=None, 
        useSVG=False, 
        returnPNG=False
    )
    fig_mol_in_route = fig_mol_in_route.save(f"{output_folder}/mol_in_route.jpg")
    
    # 3. Plot closest
    mols_to_plot = 11
    df_to_plot = sample_routes_with_dist.head(mols_to_plot)

    molecules_closest = [Chem.MolFromSmiles(x) for x in df_to_plot['smiles']]

    fig_mol_closest = Chem.Draw.MolsToGridImage(
        mols=molecules_closest, 
        molsPerRow=3, 
        subImgSize=(img_size, img_size), 
        legends=list("Rank: " + df_to_plot['distance_to_target_rank'].astype(str) + "\n" + 
                     df_to_plot['label'].astype(str)), 
        highlightAtomLists=None, 
        highlightBondLists=None, 
        useSVG=False, 
        returnPNG=False
    )
    fig_mol_closest = fig_mol_closest.save(f"{output_folder}/mol_closest_{mols_to_plot-1}.jpg")
    
    # 4. Plot quantiles
    # Close
    df_to_plot = sample_routes_with_dist.loc[[1,2,3],:]

    molecules_close = [Chem.MolFromSmiles(x) for x in df_to_plot['smiles']]

    fig_mol_close = Chem.Draw.MolsToGridImage(
        mols=molecules_close, 
        molsPerRow=3, 
        subImgSize=(img_size, img_size), 
        legends=list("Rank: " + df_to_plot['distance_to_target_rank'].astype(int).astype(str) + "\n" + 
                     df_to_plot['label'].astype(str)), 
        highlightAtomLists=None, 
        highlightBondLists=None, 
        useSVG=False, 
        returnPNG=False
    )
    fig_mol_close = fig_mol_close.save(f"{output_folder}/mol_3_close.jpg")
    
    # Mid
    nr_rows = len(sample_routes_with_dist.index)
    mid_index = int(np.round(nr_rows/2))

    df_to_plot = sample_routes_with_dist.loc[[mid_index-4,mid_index,mid_index+5],:]

    molecules_mid = [Chem.MolFromSmiles(x) for x in df_to_plot['smiles']]

    fig_mol_mid = Chem.Draw.MolsToGridImage(
        mols=molecules_mid, 
        molsPerRow=3, 
        subImgSize=(img_size, img_size), 
        legends=list("Rank: " + df_to_plot['distance_to_target_rank'].astype(int).astype(str)  + "\n" + 
                     df_to_plot['label'].astype(str)), 
        highlightAtomLists=None, 
        highlightBondLists=None, 
        useSVG=False, 
        returnPNG=False
    )
    fig_mol_mid = fig_mol_mid.save(f"{output_folder}/mol_3_mid.jpg")
    
    # Far
    df_to_plot = sample_routes_with_dist.tail(3)

    molecules_far = [Chem.MolFromSmiles(x) for x in df_to_plot['smiles']]

    fig_mol_far = Chem.Draw.MolsToGridImage(
        mols=molecules_far, 
        molsPerRow=3, 
        subImgSize=(img_size, img_size), 
        legends=list("Rank: " + df_to_plot['distance_to_target_rank'].astype(int).astype(str)  + "\n" + 
                     df_to_plot['label'].astype(str)), 
        highlightAtomLists=None, 
        highlightBondLists=None, 
        useSVG=False, 
        returnPNG=False
    )
    fig_mol_far = fig_mol_far.save(f"{output_folder}/mol_3_far.jpg")



  0%|                                                                                               | 0/12 [00:00<?, ?it/s]

CS(=O)(=O)Cc1cccc2c(C(c3ccc(Cl)cc3F)C3CC3C#N)c[nH]c12


  8%|███████▎                                                                               | 1/12 [00:00<00:08,  1.23it/s]

O=C(NCCc1ccc(Cl)cc1C1CC1)c1ccc(Oc2cc3c(cc2Cl)C(C(=O)O)CCO3)cc1


 17%|██████████████▌                                                                        | 2/12 [00:01<00:07,  1.29it/s]

C=CC(OCOC)c1ccc(N(c2cc3oc(-c4ccc(F)cc4)c(C(=O)NC)c3cc2C2CC2)S(C)(=O)=O)cc1Cl


 25%|█████████████████████▊                                                                 | 3/12 [00:02<00:06,  1.29it/s]

Brc1cc(I)sc1-c1nnc[nH]1


 33%|█████████████████████████████                                                          | 4/12 [00:03<00:06,  1.31it/s]

COc1ccc2nc(-c3ccc(C)c(NC(=O)C(C)(C)C)c3)c(-c3ccc(F)cc3)n2n1


 42%|████████████████████████████████████▎                                                  | 5/12 [00:03<00:05,  1.28it/s]

CCOC(=O)Cc1ccc(Oc2ccc3ncccc3c2N)c(OC)c1


 50%|███████████████████████████████████████████▌                                           | 6/12 [00:04<00:04,  1.31it/s]

CCCc1c(Cc2ccc(-c3ccccc3-c3noc(=O)[nH]3)cc2)c(=O)n(C2CCC(=CCO)CC2)c2ncnn12


 58%|██████████████████████████████████████████████████▊                                    | 7/12 [00:05<00:03,  1.27it/s]

Cc1cn2cc(Br)cc(S(N)(=O)=O)c2n1


 67%|██████████████████████████████████████████████████████████                             | 8/12 [00:06<00:03,  1.30it/s]

CC(C)N1CCN(C(CN2CCN(CCCc3ccccc3-c3ccccc3)CC2)c2ccc(F)cc2)CC1


 75%|█████████████████████████████████████████████████████████████████▎                     | 9/12 [00:06<00:02,  1.30it/s]

COCC[C@H](Oc1ncnc2c1cnn2-c1ncccc1Cl)C(=O)Nc1ccc(C)cn1


 83%|███████████████████████████████████████████████████████████████████████▋              | 10/12 [00:07<00:01,  1.31it/s]

CCCCCNc1nc(N)nc(C)c1Cc1ccc(OCCN(C)C)cc1


 92%|██████████████████████████████████████████████████████████████████████████████▊       | 11/12 [00:08<00:00,  1.32it/s]

Cc1ccc(NS(=O)(=O)Nc2ncnc(OCCOc3ncc(Br)cn3)c2-c2ccc(C)cc2)cc1


100%|██████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:09<00:00,  1.30it/s]
