In [1]:
import requests
import os
import shutil
import json
import pandas as pd
import yaml

import utils, request_models

import warnings
warnings.filterwarnings('ignore')


In [2]:
######## File paths ########
with open("../configs/self-driving-demo.yaml", "r") as f:
    config = yaml.safe_load(f)

protein_file_path = config['paths']['protein_file_path']
diffdock_output_dir = config['paths']['diffdock_output_dir']
dsmbind_input_dir = config['paths']['dsmbind_input_dir']

starting_molecule_csv = config['paths']['starting_molecule_csv']
molmim_generated_csv = config['paths']['molmim_generated_csv']
dsmbind_predictions_csv = config['paths']['dsmbind_predictions_csv']
results_csv = config['paths']['results_csv']

utils.prepare_output_directory(diffdock_output_dir)
utils.prepare_output_directory(dsmbind_input_dir)
utils.delete_folders_in_folder(diffdock_output_dir)
utils.delete_folders_in_folder(dsmbind_input_dir)

utils.delete_datasets(molmim_generated_csv)
utils.delete_datasets(dsmbind_predictions_csv)
utils.delete_datasets(results_csv)

# Get folded protein
folded_protein = utils.file_to_json_compatible_string(protein_file_path)

# Get starting molecules
df_starting_molecules = pd.read_csv(starting_molecule_csv)

# Round 0
print("Round 0")
molecule_name = df_starting_molecules['Molecules'][0]
molecule = df_starting_molecules['Smiles'][0]

# Molecular Generation with MolMIM
molmim_response = request_models.call_molmim(molecule)
generated_ligands = '\n'.join([v['smiles'] for v in molmim_response['generated']])
utils.update_dataframe_molmim_generated_molecules(molmim_response['generated'], molecule_name)

# Protein-Ligand Docking with DiffDock
diffdock_response = request_models.call_diffdock(folded_protein, generated_ligands)
utils.create_diffdock_outputs_dsmbind_inputs(molecule_name, diffdock_response)

# Binding Affinity with DSMBind
os.system("python /workspace/bionemo/examples/molecule/dsmbind/infer.py")


File '../data/molmim_generated_molecules.csv' deleted successfully.
File '../data/dsmbind_predictions.csv' deleted successfully.
File '../data/results.csv' deleted successfully.
Round 0
ligand subfolders are  Nirmatrelvir_compound0
ligand subfolders are  Nirmatrelvir_compound1
ligand subfolders are  Nirmatrelvir_compound2
ligand subfolders are  Nirmatrelvir_compound3
ligand subfolders are  Nirmatrelvir_compound4


Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality


[NeMo I 2024-12-18 19:46:02 megatron_hiddens:110] Registered hidden transform sampled_var_cond_gaussian at bionemo.model.core.hiddens_support.SampledVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:46:02 megatron_hiddens:110] Registered hidden transform interp_var_cond_gaussian at bionemo.model.core.hiddens_support.InterpVarGaussianHiddenTransform
[NeMo I 2024-12-18 19:46:03 infer:70] 
    
    ************** Experiment Configuration ***********
[NeMo I 2024-12-18 19:46:03 infer:71] 
    data:
      processed: false
      raw_inference_data_dir: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_inputs
      predictions_csv: ${oc.env:BIONEMO_HOME}/generative-virtual-screening/data/dsmbind_predictions.csv
      processed_inference_data_path: ${oc.env:BIONEMO_HOME}/examples/tests/test_data/molecule/dsmbind/inference/processed.pkl
    model:
      hidden_size: 256
      mpn_depth: 3
      num_heads: 4
      dropout: 0.1
      threshold: 10.0
      vocab_size: 38
      aa_siz

  0%|          | 0/20 [00:00<?, ?it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 20%|██        | 4/20 [00:00<00:01, 10.35it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 45%|████▌     | 9/20 [00:00<00:00, 20.38it/s]Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
Failed to patch pandas - unable to change molecule rendering
 70%|███████   |

[NeMo I 2024-12-18 19:46:04 infer:98] ************** Loading Inference Dataset ***********
[NeMo I 2024-12-18 19:46:04 infer:106] ************** Starting Inference ***********
[NeMo I 2024-12-18 19:46:04 infer:108] Predictions: [-1667492.0, -1578919.5, -1270149.0, -1404532.0, -1563523.0, -1246019.875, -1598389.625, -1403730.625, -1097129.0, -1428098.5, -1631920.5, -1267375.25, -1269627.0, -1477082.125, -1349006.5, -1252324.75, -1622407.875, -1954015.0, -1423121.875, -1697958.5]


0

In [3]:

df_molmim = pd.read_csv(molmim_generated_csv) 
df_dsmbind = pd.read_csv(dsmbind_predictions_csv)
df_joined = pd.concat([df_molmim, df_dsmbind], axis=1)

df_joined.to_csv('../data/results.csv')

In [4]:
def update_dataframe_molmim_generated_molecules(molmim_generated, 
                                                starting_molecule_name, 
                                                molmim_generated_csv = molmim_generated_csv):
    import pandas as pd

    df = pd.DataFrame(molmim_generated)
    # Reset the index and make it a column
    df.reset_index(inplace=True)
    df.rename(columns={'smiles':'generated_smiles',
                    'score':'molmim_qed_score',
                    'index':'generated_compound_index'},
                    inplace=True)
    df['starting_molecule'] = starting_molecule_name

    if os.path.exists(molmim_generated_csv):
        print("The CSV file exists.")
        df_old = pd.read_csv(molmim_generated_csv)
        df_merged = pd.concat([df_old, df], ignore_index=True)

        df_merged.to_csv(molmim_generated_csv, index=False)
    else:
        df.to_csv(molmim_generated_csv, index=False)

In [5]:
threshold_binding_affinity = config['threshold_binding_affinity']
threshold_for_number_of_selected_molecules = config['threshold_for_number_of_selected_molecules']

rd = 0
while len(df_joined[df_joined['DSMBind_predictions'] < threshold_binding_affinity]) < threshold_for_number_of_selected_molecules:
    rd = rd + 1
    print(f"Round {rd}")

    molecule_name = df_starting_molecules['Molecules'][rd]
    molecule = df_starting_molecules['Smiles'][rd]

    # Molecular Generation with MolMIM
    print("MolMIM...")
    molmim_response = request_models.call_molmim(molecule)
    generated_ligands = '\n'.join([v['smiles'] for v in molmim_response['generated']])
    update_dataframe_molmim_generated_molecules(molmim_response['generated'], molecule_name)

    # Protein-Ligand Docking with DiffDock
    print("DiffDock...")
    diffdock_response = request_models.call_diffdock(folded_protein, generated_ligands)
    utils.create_diffdock_outputs_dsmbind_inputs(molecule_name, diffdock_response)

    # Binding Affinity with DSMBind
    print("DSMBind...")
    os.system("python /workspace/bionemo/examples/molecule/dsmbind/infer.py")

    df_molmim = pd.read_csv(molmim_generated_csv) 
    df_dsmbind = pd.read_csv(dsmbind_predictions_csv)
    df_joined = pd.concat([df_molmim, df_dsmbind], axis=1)

    df_joined.to_csv('../data/results.csv')


In [6]:
len(df_joined[df_joined['DSMBind_predictions'] < threshold_for_number_of_selected_molecules])

20

In [7]:
df_joined

Unnamed: 0,generated_compound_index,generated_smiles,molmim_qed_score,starting_molecule,DSMBind_predictions
0,0.0,CC(C)(C)C(NC(=O)C(F)(F)C1CCOCC1)C(C)(C)C,0.86451,Nirmatrelvir,-1667492.0
1,1.0,CC(C)(C)C(NC(=O)C(F)(F)F)C(=O)N1CC2CCC1CC2,0.849637,Nirmatrelvir,-1578919.5
2,2.0,CC(C)(C)C(NC(=O)C(F)(F)F)C(=O)N1CC2(C1)CC1(CCC...,0.835813,Nirmatrelvir,-1270149.0
3,3.0,CC(C)(C)C(F)(F)CC(=O)[C@H](O)[C@@H]1C[C@H]1C1CC1,0.824634,Nirmatrelvir,-1404532.0
4,4.0,CC(C)(C)[C@@H](NC(=O)C(F)(F)F)C(=O)N1CC2(CCOCC...,0.82343,Nirmatrelvir,-1563523.0
5,,,,,-1246019.875
6,,,,,-1598389.625
7,,,,,-1403730.625
8,,,,,-1097129.0
9,,,,,-1428098.5
