In [1]:
import requests
import os
import shutil
import json
import pandas as pd
import yaml

import utils, request_models

import warnings
warnings.filterwarnings('ignore')


In [2]:
######## File paths ########
with open("../configs/self-driving-demo.yaml", "r") as f:
    config = yaml.safe_load(f)

protein_file_path = config['paths']['protein_file_path']
diffdock_output_dir = config['paths']['diffdock_output_dir']
dsmbind_input_dir = config['paths']['diffdock_output_dir']

starting_molecule_csv = config['paths']['starting_molecule_csv']
molmim_generated_csv = config['paths']['molmim_generated_csv']
dsmbind_predictions_csv = config['paths']['dsmbind_predictions_csv']
results_csv = config['paths']['results_csv']

utils.prepare_output_directory(diffdock_output_dir)
utils.prepare_output_directory(dsmbind_input_dir)

utils.delete_datasets(molmim_generated_csv)
utils.delete_datasets(dsmbind_predictions_csv)
utils.delete_datasets(results_csv)

# Get folded protein
folded_protein = utils.file_to_json_compatible_string(protein_file_path)

# Get starting molecules
df_starting_molecules = pd.read_csv(starting_molecule_csv)

# Round 0
print("Round 0")
molecule_name = df_starting_molecules['Molecules'][0]
molecule = df_starting_molecules['Smiles'][0]

# Molecular Generation with MolMIM
molmim_response = request_models.call_molmim(molecule)
generated_ligands = '\n'.join([v['smiles'] for v in molmim_response['generated']])
utils.update_dataframe_molmim_generated_molecules(molmim_response['generated'], molecule_name)

# Protein-Ligand Docking with DiffDock
diffdock_response = request_models.call_diffdock(folded_protein, generated_ligands)
utils.create_diffdock_outputs_dsmbind_inputs(molecule_name, diffdock_response)

# Binding Affinity with DSMBind
os.system("python /workspace/bionemo/examples/molecule/dsmbind/infer.py")


File '../data/molmim_generated_molecules.csv' deleted successfully.
File '../data/dsmbind_predictions.csv' deleted successfully.
File '../data/results.csv' deleted successfully.
Round 0


Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality


[NeMo I 2024-12-18 19:21:22 megatron_hiddens:110] Registered hidden transform sampled_var_cond_gaussian at bionemo.model.core.hiddens_support.SampledVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:21:22 megatron_hiddens:110] Registered hidden transform interp_var_cond_gaussian at bionemo.model.core.hiddens_support.InterpVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:21:23 infer:70] 
    
    ************** Experiment Configuration ***********
[NeMo I 2024-12-18 19:21:23 infer:71] 
    data:
      processed: false
      raw_inference_data_dir: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_inputs
      predictions_csv: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_predictions.csv
      processed_inference_data_path: ${oc.env:BIONEMO_HOME}/examples/tests/test_data/molecule/dsmbind/inference/processed.pkl
    model:
      hidden_size: 256
      mpn_depth: 3
      num_heads: 4
      dropout: 0.1
      threshold: 10.0
      vocab_size: 38
      aa_siz

  0%|          | 0/10 [00:00<?, ?it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 40%|████      | 4/10 [00:00<00:00, 10.32it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 90%|█████████ | 9/10 [00:00<00:00, 20.36it/s]Failed to patch pandas - unable to change molecule rendering
100%|██████████| 10/10 [00:00<00:00, 19.14it/s]
100%|██████████| 8/8 [00:00<00:00, 775.36it/s]


'Molecule'
[NeMo I 2024-12-18 19:21:24 infer:98] ************** Loading Inference Dataset ***********
[NeMo I 2024-12-18 19:21:24 infer:106] ************** Starting Inference ***********
[NeMo I 2024-12-18 19:21:24 infer:108] Predictions: [-1742725.875, -1473968.25, -1567259.375, -1626483.25, -1125988.75, -1357558.125, -1624152.0, -1667095.5]


0

In [3]:

df_molmim = pd.read_csv(molmim_generated_csv) 
df_dsmbind = pd.read_csv(dsmbind_predictions_csv)
df_joined = pd.concat([df_molmim, df_dsmbind], axis=1)

df_joined.to_csv('../data/results.csv')

In [4]:
def update_dataframe_molmim_generated_molecules(molmim_generated, 
                                                starting_molecule_name, 
                                                molmim_generated_csv = molmim_generated_csv):
    import pandas as pd

    df = pd.DataFrame(molmim_generated)
    # Reset the index and make it a column
    df.reset_index(inplace=True)
    df.rename(columns={'smiles':'generated_smiles',
                    'score':'molmim_qed_score',
                    'index':'generated_compound_index'},
                    inplace=True)
    df['starting_molecule'] = starting_molecule_name

    if os.path.exists(molmim_generated_csv):
        print("The CSV file exists.")
        df_old = pd.read_csv(molmim_generated_csv)
        df_merged = pd.concat([df_old, df], ignore_index=True)

        df_merged.to_csv(molmim_generated_csv, index=False)
    else:
        df.to_csv(molmim_generated_csv, index=False)

In [5]:
threshold_binding_affinity = config['threshold_binding_affinity']
threshold_for_number_of_selected_molecules = config['threshold_for_number_of_selected_molecules']

rd = 0
while len(df_joined[df_joined['DSMBind_predictions'] < threshold_binding_affinity]) < 5:
    rd = rd + 1
    print(f"Round {rd}")

    molecule_name = df_starting_molecules['Molecules'][rd]
    molecule = df_starting_molecules['Smiles'][rd]

    # Molecular Generation with MolMIM
    print("MolMIM...")
    molmim_response = request_models.call_molmim(molecule)
    generated_ligands = '\n'.join([v['smiles'] for v in molmim_response['generated']])
    update_dataframe_molmim_generated_molecules(molmim_response['generated'], molecule_name)

    # Protein-Ligand Docking with DiffDock
    print("DiffDock...")
    diffdock_response = request_models.call_diffdock(folded_protein, generated_ligands)
    utils.create_diffdock_outputs_dsmbind_inputs(molecule_name, diffdock_response)

    # Binding Affinity with DSMBind
    print("DSMBind...")
    os.system("python /workspace/bionemo/examples/molecule/dsmbind/infer.py")

    df_molmim = pd.read_csv(molmim_generated_csv) 
    df_dsmbind = pd.read_csv(dsmbind_predictions_csv)
    df_joined = pd.concat([df_molmim, df_dsmbind], axis=1)

    df_joined.to_csv('../data/results.csv')


Round 1
MolMIM...
The CSV file exists.
DiffDock...
DSMBind...


Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality


[NeMo I 2024-12-18 19:22:11 megatron_hiddens:110] Registered hidden transform sampled_var_cond_gaussian at bionemo.model.core.hiddens_support.SampledVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:22:11 megatron_hiddens:110] Registered hidden transform interp_var_cond_gaussian at bionemo.model.core.hiddens_support.InterpVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:22:12 infer:70] 
    
    ************** Experiment Configuration ***********
[NeMo I 2024-12-18 19:22:12 infer:71] 
    data:
      processed: false
      raw_inference_data_dir: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_inputs
      predictions_csv: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_predictions.csv
      processed_inference_data_path: ${oc.env:BIONEMO_HOME}/examples/tests/test_data/molecule/dsmbind/inference/processed.pkl
    model:
      hidden_size: 256
      mpn_depth: 3
      num_heads: 4
      dropout: 0.1
      threshold: 10.0
      vocab_size: 38
      aa_siz

  0%|          | 0/10 [00:00<?, ?it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 40%|████      | 4/10 [00:00<00:00, 10.45it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 90%|█████████ | 9/10 [00:00<00:00, 20.57it/s]Failed to patch pandas - unable to change molecule rendering
100%|██████████| 10/10 [00:00<00:00, 19.34it/s]
100%|██████████| 10/10 [00:00<00:00, 792.04it/s]


[NeMo I 2024-12-18 19:22:12 infer:98] ************** Loading Inference Dataset ***********
[NeMo I 2024-12-18 19:22:12 infer:106] ************** Starting Inference ***********
[NeMo I 2024-12-18 19:22:13 infer:108] Predictions: [-1667492.0, -1742725.875, -1270149.0, -1404532.0, -1567259.375, -1626483.25, -1125988.75, -1631920.5, -1423121.875, -1667095.5]
Round 2
MolMIM...
The CSV file exists.
DiffDock...
DSMBind...


Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality


[NeMo I 2024-12-18 19:23:00 megatron_hiddens:110] Registered hidden transform sampled_var_cond_gaussian at bionemo.model.core.hiddens_support.SampledVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:23:00 megatron_hiddens:110] Registered hidden transform interp_var_cond_gaussian at bionemo.model.core.hiddens_support.InterpVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:23:00 infer:70] 
    
    ************** Experiment Configuration ***********
[NeMo I 2024-12-18 19:23:00 infer:71] 
    data:
      processed: false
      raw_inference_data_dir: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_inputs
      predictions_csv: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_predictions.csv
      processed_inference_data_path: ${oc.env:BIONEMO_HOME}/examples/tests/test_data/molecule/dsmbind/inference/processed.pkl
    model:
      hidden_size: 256
      mpn_depth: 3
      num_heads: 4
      dropout: 0.1
      threshold: 10.0
      vocab_size: 38
      aa_siz

  0%|          | 0/15 [00:00<?, ?it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 27%|██▋       | 4/15 [00:00<00:01, 10.34it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 60%|██████    | 9/15 [00:00<00:00, 20.43it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 93%|█████████▎|

[NeMo I 2024-12-18 19:23:01 infer:98] ************** Loading Inference Dataset ***********
[NeMo I 2024-12-18 19:23:01 infer:106] ************** Starting Inference ***********
[NeMo I 2024-12-18 19:23:02 infer:108] Predictions: [-1667492.0, -1742725.875, -1270149.0, -1404532.0, -1563523.0, -1567259.375, -1403730.625, -1626483.25, -1125988.75, -1631920.5, -1477082.125, -1622407.875, -1954015.0, -1423121.875, -1667095.5]
Round 3
MolMIM...
The CSV file exists.
DiffDock...
DSMBind...


Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality


[NeMo I 2024-12-18 19:23:48 megatron_hiddens:110] Registered hidden transform sampled_var_cond_gaussian at bionemo.model.core.hiddens_support.SampledVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:23:48 megatron_hiddens:110] Registered hidden transform interp_var_cond_gaussian at bionemo.model.core.hiddens_support.InterpVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:23:49 infer:70] 
    
    ************** Experiment Configuration ***********
[NeMo I 2024-12-18 19:23:49 infer:71] 
    data:
      processed: false
      raw_inference_data_dir: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_inputs
      predictions_csv: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_predictions.csv
      processed_inference_data_path: ${oc.env:BIONEMO_HOME}/examples/tests/test_data/molecule/dsmbind/inference/processed.pkl
    model:
      hidden_size: 256
      mpn_depth: 3
      num_heads: 4
      dropout: 0.1
      threshold: 10.0
      vocab_size: 38
      aa_siz

  0%|          | 0/20 [00:00<?, ?it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 20%|██        | 4/20 [00:00<00:01, 10.59it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 45%|████▌     | 9/20 [00:00<00:00, 20.62it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 70%|███████   |

[NeMo I 2024-12-18 19:23:50 infer:98] ************** Loading Inference Dataset ***********
[NeMo I 2024-12-18 19:23:50 infer:106] ************** Starting Inference ***********
[NeMo I 2024-12-18 19:23:50 infer:108] Predictions: [-1667492.0, -1742725.875, -1270149.0, -1404532.0, -1563523.0, -1246019.875, -1567259.375, -1403730.625, -1626483.25, -1125988.75, -1631920.5, -1267375.25, -1269627.0, -1477082.125, -1349006.5, -1252324.75, -1622407.875, -1954015.0, -1423121.875, -1667095.5]
Round 4


KeyError: 4

In [7]:
len(df_joined[df_joined['DSMBind_predictions'] < -1400000])

13

In [None]:
df_joined