In [9]:
# Adding required libraries for an interactive protein-ligand docking visualization
# Please run the folllowing pip install command to install necessary libraries before proceeding 
# !pip install py3Dmol rdkit ipywidgets

import py3Dmol
from rdkit import Chem
import ipywidgets as widgets
from IPython.display import display
import glob
import yaml
import json
import os

import warnings
warnings.filterwarnings('ignore')


######## File paths ########
with open("../configs/self-driving-demo.yaml", "r") as f:
    config = yaml.safe_load(f)

protein_file_path = config['paths']['protein_file_path']
diffdock_output_dir = config['paths']['diffdock_output_dir']
dsmbind_input_dir = config['paths']['dsmbind_input_dir']

starting_molecule_csv = config['paths']['starting_molecule_csv']
molmim_generated_csv = config['paths']['molmim_generated_csv']
dsmbind_predictions_csv = config['paths']['dsmbind_predictions_csv']
results_csv = config['paths']['results_csv']

In [23]:

# defining a function for color definitions for visualization
def ansi_color(text, color):
    """Color text for console output"""
    colors = {
        "red": "\033[31m",
        "green": "\033[32m",
        "yellow": "\033[33m",
        "blue": "\033[34m",
        "magenta": "\033[35m",
        "cyan": "\033[36m",
        "white": "\033[37m",
        "reset": "\033[0m"
    }
    return f"{colors[color]}{text}{colors['reset']}"

# loading dock poses from the output SDF files extracted from the output.json 'positions' field
def load_poses_from_sdf(directory):
    sdf_files = glob.glob(f"{directory}/*.sdf")
    poses = []
    
    for sdf_file in sdf_files:
        print("sdf_file is ", sdf_file)
        supplier = Chem.SDMolSupplier(sdf_file)
        for mol in supplier:
            if mol is not None:
                poses.append(mol)  
    return poses

# visualising the docking poses in an interactive manner, browsing docked poses using an embedded slider
def update_viewer(pose_index):
    
    view = py3Dmol.view(width=1200, height=900)
    
    # Add the protein model
    view.addModel(protein_file_path, 'pdb')
    view.setStyle({'model': 0}, {'cartoon': {'color': 'white', 'opacity': 0.7}})
    view.setViewStyle({'style':'outline','color':'black','width':0.03})
    Prot=view.getModel()
    Prot.setStyle({'cartoon':{'arrows':True, 'tubes':True, 'style':'oval', 'color':'white'}})
    view.addSurface(py3Dmol.VDW,{'opacity':0.4,'color':'white'})
    
    # Add the selected docking pose
    pose = poses[pose_index]
    print("pose is ", pose)
    pose_block = Chem.MolToMolBlock(pose)
    # color = "#"+''.join([random.choice('0123456789ABCDEF') for _ in range(6)])
    view.addModel(pose_block, 'mol')
    view.setStyle({'model': 1}, {'stick': {'radius': 0.3, 'colorscheme': 'magentaCarbon'}})
    view.addSurface(py3Dmol.VDW, {'opacity': 0.7, 'colorscheme': 'magentaCarbon'}, {'model': 1})
    score = round(confidence_scores[0][pose_index], 3)
    score_color = "green" if score > -0.5 else "blue" if score >= -1.5 else "red"
    print(f"Loaded {ansi_color(ligand_name, 'magenta')} with confidence score: {ansi_color(confidence_scores[0][pose_index], score_color)}")
    view.zoomTo()
    return view.update()


In [24]:

# Load the protein model
with open(protein_file_path, 'r') as f:
    protein_pdb = f.read()

# Specify the directory containing the dock poses in SDF format for a specific ligand
ligand_name = "Ensitrelvir_compound0"
directory = diffdock_output_dir + ligand_name
poses = load_poses_from_sdf(directory)

# Verify the number of poses loaded
print(f"Number of poses loaded: {len(poses)}")


sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_6.sdf
sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_7.sdf
sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_0.sdf
sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_8.sdf
sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_5.sdf
sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_3.sdf
sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_2.sdf
sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_4.sdf
sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_1.sdf
sdf_file is  ../data/diffdock_outputs/Ensitrelvir_compound0/pose_9.sdf
Number of poses loaded: 10


In [12]:
# Load confidence scores from output.json
output_json_path = os.path.join(diffdock_output_dir, 'output.json')
with open(output_json_path, 'r') as file:
    data = json.load(file)
    confidence_scores = data['position_confidence']  # list of floats

# Create a slider widget
pose_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(poses) - 1,
    step=1,
    description='Pose:',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

# Link the slider to the viewer update function
widgets.interact(update_viewer, pose_index=pose_slider)

interactive(children=(IntSlider(value=0, continuous_update=False, description='Pose:', max=9), Output()), _dom…

<function __main__.update_viewer(pose_index)>

In [21]:
poses

[<rdkit.Chem.rdchem.Mol at 0x7108bb990a50>,
 <rdkit.Chem.rdchem.Mol at 0x7108bb990eb0>,
 <rdkit.Chem.rdchem.Mol at 0x7108bb990f20>,
 <rdkit.Chem.rdchem.Mol at 0x7108bb990f90>,
 <rdkit.Chem.rdchem.Mol at 0x7108bb991000>,
 <rdkit.Chem.rdchem.Mol at 0x7108bb991070>,
 <rdkit.Chem.rdchem.Mol at 0x7108bb9910e0>,
 <rdkit.Chem.rdchem.Mol at 0x7108bb991150>,
 <rdkit.Chem.rdchem.Mol at 0x7108bb9911c0>,
 <rdkit.Chem.rdchem.Mol at 0x7108bb991230>]

In [14]:
confidence_scores[0]

[-0.8199455142021179,
 -0.8673657178878784,
 -1.1622602939605713,
 -1.2888742685317993,
 -1.9154716730117798,
 -2.044520854949951,
 -2.985332727432251,
 -3.2050280570983887,
 -3.6186628341674805,
 -4.01269006729126]