In [1]:
import requests
import os
import shutil
import json
import pandas as pd
import yaml

import utils, request_models

import warnings
warnings.filterwarnings('ignore')


In [None]:
######## File paths ########
with open("../configs/self-driving-demo.yaml", "r") as f:
    config = yaml.safe_load(f)

protein_file_path = config['paths']['protein_file_path']
diffdock_output_dir = config['paths']['diffdock_output_dir']
dsmbind_input_dir = config['paths']['diffdock_output_dir']

starting_molecule_csv = config['paths']['starting_molecule_csv']
molmim_generated_csv = config['paths']['molmim_generated_csv']
dsmbind_predictions_csv = config['paths']['dsmbind_predictions_csv']
results_csv = config['paths']['results_csv']

utils.prepare_output_directory(diffdock_output_dir)
utils.prepare_output_directory(dsmbind_input_dir)
utils.delete_folders_in_folder(diffdock_output_dir)
utils.delete_folders_in_folder(dsmbind_input_dir)

utils.delete_datasets(molmim_generated_csv)
utils.delete_datasets(dsmbind_predictions_csv)
utils.delete_datasets(results_csv)

# Get folded protein
folded_protein = utils.file_to_json_compatible_string(protein_file_path)

# Get starting molecules
df_starting_molecules = pd.read_csv(starting_molecule_csv)

# Round 0
print("Round 0")
molecule_name = df_starting_molecules['Molecules'][0]
molecule = df_starting_molecules['Smiles'][0]

# Molecular Generation with MolMIM
molmim_response = request_models.call_molmim(molecule)
generated_ligands = '\n'.join([v['smiles'] for v in molmim_response['generated']])
utils.update_dataframe_molmim_generated_molecules(molmim_response['generated'], molecule_name)

# Protein-Ligand Docking with DiffDock
diffdock_response = request_models.call_diffdock(folded_protein, generated_ligands)
utils.create_diffdock_outputs_dsmbind_inputs(molecule_name, diffdock_response)

# Binding Affinity with DSMBind
os.system("python /workspace/bionemo/examples/molecule/dsmbind/infer.py")


File '../data/molmim_generated_molecules.csv' deleted successfully.
File '../data/dsmbind_predictions.csv' deleted successfully.
File '../data/results.csv' deleted successfully.
Round 0


In [3]:

df_molmim = pd.read_csv(molmim_generated_csv) 
df_dsmbind = pd.read_csv(dsmbind_predictions_csv)
df_joined = pd.concat([df_molmim, df_dsmbind], axis=1)

df_joined.to_csv('../data/results.csv')

In [4]:
def update_dataframe_molmim_generated_molecules(molmim_generated, 
                                                starting_molecule_name, 
                                                molmim_generated_csv = molmim_generated_csv):
    import pandas as pd

    df = pd.DataFrame(molmim_generated)
    # Reset the index and make it a column
    df.reset_index(inplace=True)
    df.rename(columns={'smiles':'generated_smiles',
                    'score':'molmim_qed_score',
                    'index':'generated_compound_index'},
                    inplace=True)
    df['starting_molecule'] = starting_molecule_name

    if os.path.exists(molmim_generated_csv):
        print("The CSV file exists.")
        df_old = pd.read_csv(molmim_generated_csv)
        df_merged = pd.concat([df_old, df], ignore_index=True)

        df_merged.to_csv(molmim_generated_csv, index=False)
    else:
        df.to_csv(molmim_generated_csv, index=False)

In [5]:
threshold_binding_affinity = config['threshold_binding_affinity']
threshold_for_number_of_selected_molecules = config['threshold_for_number_of_selected_molecules']

rd = 0
while len(df_joined[df_joined['DSMBind_predictions'] < threshold_binding_affinity]) < threshold_for_number_of_selected_molecules:
    rd = rd + 1
    print(f"Round {rd}")

    molecule_name = df_starting_molecules['Molecules'][rd]
    molecule = df_starting_molecules['Smiles'][rd]

    # Molecular Generation with MolMIM
    print("MolMIM...")
    molmim_response = request_models.call_molmim(molecule)
    generated_ligands = '\n'.join([v['smiles'] for v in molmim_response['generated']])
    update_dataframe_molmim_generated_molecules(molmim_response['generated'], molecule_name)

    # Protein-Ligand Docking with DiffDock
    print("DiffDock...")
    diffdock_response = request_models.call_diffdock(folded_protein, generated_ligands)
    utils.create_diffdock_outputs_dsmbind_inputs(molecule_name, diffdock_response)

    # Binding Affinity with DSMBind
    print("DSMBind...")
    os.system("python /workspace/bionemo/examples/molecule/dsmbind/infer.py")

    df_molmim = pd.read_csv(molmim_generated_csv) 
    df_dsmbind = pd.read_csv(dsmbind_predictions_csv)
    df_joined = pd.concat([df_molmim, df_dsmbind], axis=1)

    df_joined.to_csv('../data/results.csv')


In [6]:
len(df_joined[df_joined['DSMBind_predictions'] < threshold_for_number_of_selected_molecules])

20

In [None]:
df_joined

Unnamed: 0,generated_compound_index,generated_smiles,molmim_qed_score,starting_molecule,DSMBind_predictions
0,0.0,CC(C)(C)c1cc(=O)c(C(=O)N2CC[C@@H]2C2CCC2)c[nH]1,0.909366,Nirmatrelvir,-1667492.0
1,1.0,CC(C)(C)[C@H](NC(=O)C(F)(F)F)C(=O)N1CCc2ccccc2...,0.855078,Nirmatrelvir,-1425028.5
2,2.0,CC(C)(C)C(NC(=O)C(F)(F)F)C(=O)N1CCOC[C@H]1C1CC1,0.853662,Nirmatrelvir,-1270149.0
3,3.0,CC(C)(C)[C@H](NC(=O)C(F)(F)c1ccccc1)C(=O)OC(C)...,0.85066,Nirmatrelvir,-1404532.0
4,4.0,CC(C)(C)[C@H](NC(=O)C(F)(F)F)C(=O)N1Cc2ccccc2O...,0.848737,Nirmatrelvir,-1563523.0
5,,,,,-1246019.875
6,,,,,-1825153.25
7,,,,,-1403730.625
8,,,,,-1494919.125
9,,,,,-1540087.375
