In [None]:
! conda install nglview
! conda install openbabel
! conda install mdanalysis
! pip install https://github.com/volkamerlab/opencadd/archive/master.tar.gz
! conda install -c conda-forge smina
! mamba install teachopencadd -c conda-forge -c defaults

In [None]:
from teachopencadd.utils import show_pdf

In [None]:
pdf = (
    "https://www.ncbi.nlm.nih.gov/"
    "pmc/articles/PMC4489249/bin/supp_gkv315_nar-00254-web-b-2015-File003.pdf"
)
show_pdf(pdf)

In [None]:
"""Visualization: complex and interactions¶
We will use nglview for visualization. It’s a web-based molecular viewer that can be run on Jupyter notebooks. We will first use it in a basic way to visualize a complex of interest. Then, we will make use of ipywidgets layouts to visualize protein-ligand interactions"""

In [None]:
# Import libraries
from pathlib import Path
import time
import warnings

warnings.filterwarnings("ignore")

import pandas as pd
import nglview as nv
import openbabel
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
from plip.structure.preparation import PDBComplex
from plip.exchange.report import BindingSiteReport

from opencadd.structure.core import Structure

In [None]:
# Absolute path
HERE = Path(_dh[-1])
DATA = HERE / "data"

In [None]:
"""PDB complex: example with EGFR¶
As a test case for this notebook, we choose the EGFR kinase. The considered PDB structure is given by the ID 3POZ. Let’s use nglview to visualize the structure in a notebook cell.

Note: the complex can easily be changed by adapting the PDB ID in the cell below."""

In [None]:
#We now fetch the PDB structure file from the PDB using opencadd.structure.superposition.

In [None]:
pdb_file = pybel.readfile("pdb", str(DrugDiscoveryGroup/shared/explore-rdkit/data/interaction.pdb)
# Download it to file for later use
pdb_file.write(DATA / f"{pdb_id}.pdb")

In [None]:
#Show the complex based on PDB ID

ngl_viewer = nv.show_pdbid(pdb_id)
# add the ligands
ngl_viewer.add_representation(repr_type="ball+stick", selection="hetero and not water")
# center view on binding site
ngl_viewer.center("ligand")
ngl_viewer

In [None]:
ngl_viewer._display_image()

In [None]:
"""Profiling protein-ligand interactions using PLIP¶
PLIP offers a webserver for automated analysis, but unfortunately there is no API. We could try to use the HTML forms as if we were using the standard web UI, but since the library itself is Python-3 ready and very easy to install with pip, we can just use it locally for simplicity.

PLIP takes PDB files as input, so we can pass the PDB file to PLIP and let it do its magic. The BindingSiteReport class processes each detected binding site in PDBComplex and creates an object with the (eight) fields we are interested in, namely

hydrophobic interaction : hydrophobic

hydrogen bond : hbond

water bridge : waterbridge

salt bridge : saltbridge

-stacking (parallel and perpendicular) : pistacking

- cation : pication

halogen bond : halogen

metal complexation : metal

These fields are divided in <field>_features (containing column names) and <field>_info (containing the actual records). If we iterate over the object retrieving the correct attribute name with getattr(), we can compose a dictionary that can be passed to a pandas.DataFrame for nice overviews.

This dictionary is composed of two levels: - First level is the detected binding sites. - For each binding site, we have one more sub-dictionary containing eight lists, one for each specific interaction. * Each list will contain the column names in the first row, and the data (if available) in the following."""

In [None]:
def retrieve_plip_interactions(pdb_file):
    """
    Retrieves the interactions from PLIP.

    Parameters
    ----------
    pdb_file :
            The PDB file of the complex.

    Returns
    -------
    dict :
            A dictionary of the binding sites and the interactions.
    """
    protlig = PDBComplex()
    protlig.load_pdb(pdb_file)  # load the pdb file
    for ligand in protlig.ligands:
        protlig.characterize_complex(ligand)  # find ligands and analyze interactions
    sites = {}
    # loop over binding sites
    for key, site in sorted(protlig.interaction_sets.items()):
        binding_site = BindingSiteReport(site)  # collect data about interactions
        # tuples of *_features and *_info will be converted to pandas data frame
        keys = (
            "hydrophobic",
            "hbond",
            "waterbridge",
            "saltbridge",
            "pistacking",
            "pication",
            "halogen",
            "metal",
        )
        # interactions is a dictionary which contains relevant information for each
        # of the possible interactions: hydrophobic, hbond, etc. in the considered
        # binding site. Each interaction contains a list with
        # 1. the features of that interaction, e.g. for hydrophobic:
        # ('RESNR', 'RESTYPE', ..., 'LIGCOO', 'PROTCOO')
        # 2. information for each of these features, e.g. for hydrophobic
        # (residue nb, residue type,..., ligand atom 3D coord., protein atom 3D coord.)
        interactions = {
            k: [getattr(binding_site, k + "_features")] + getattr(binding_site, k + "_info")
            for k in keys
        }
        sites[key] = interactions
    return sites

In [None]:
#We create the dictionary for the complex of interest:
interactions_by_site = retrieve_plip_interactions(f"{DATA}/{pdb_id}.pdb")

In [None]:
#Let’s see how many binding sites are detected:

print(
    f"Number of binding sites detected in {pdb_id} : "
    f"{len(interactions_by_site)}\n"
    f"with {interactions_by_site.keys()}"
)
# NBVAL_CHECK_OUTPUT

In [None]:
#In this case, the first binding site containing ligand 03P will be further investigated.

index_of_selected_site = 0
selected_site = list(interactions_by_site.keys())[index_of_selected_site]
print(selected_site)

In [None]:
#Table of interaction types¶
#We can construct a pandas.DataFrame for a binding site and particular interaction type.

def create_df_from_binding_site(selected_site_interactions, interaction_type="hbond"):
    """
    Creates a data frame from a binding site and interaction type.

    Parameters
    ----------
    selected_site_interactions : dict
        Precaluclated interactions from PLIP for the selected site
    interaction_type : str
        The interaction type of interest (default set to hydrogen bond).

    Returns
    -------
    DataFrame :
        Data frame with information retrieved from PLIP.
    """

    # check if interaction type is valid:
    valid_types = [
        "hydrophobic",
        "hbond",
        "waterbridge",
        "saltbridge",
        "pistacking",
        "pication",
        "halogen",
        "metal",
    ]

    if interaction_type not in valid_types:
        print("!!! Wrong interaction type specified. Hbond is chosen by default!!!\n")
        interaction_type = "hbond"

    df = pd.DataFrame.from_records(
        # data is stored AFTER the column names
        selected_site_interactions[interaction_type][1:],
        # column names are always the first element
        columns=selected_site_interactions[interaction_type][0],
    )
    return df

In [None]:
#Hydrophobic interactions

#In the next cell, we show the hydrophobic interactions from the selected binding site.

create_df_from_binding_site(interactions_by_site[selected_site], interaction_type="hydrophobic")

In [None]:
#Hydrogen interactions

#If hydrogen interactions are of interest, the table can be generated as follow:

create_df_from_binding_site(interactions_by_site[selected_site], interaction_type="hbond")


In [None]:
# halogen interactions:

create_df_from_binding_site(interactions_by_site[selected_site], interaction_type="halogen")


In [None]:
#Visualization with NGLView¶
#Now, let’s try to represent those interactions in the NGL viewer. We can draw cylinders between the interaction points (LIGCOO and PROTCOO in the pandas.DataFrame) and color-code them as shown in color_map, which uses RGB tuples.

color_map = {
    "hydrophobic": [0.90, 0.10, 0.29],
    "hbond": [0.26, 0.83, 0.96],
    "waterbridge": [1.00, 0.88, 0.10],
    "saltbridge": [0.67, 1.00, 0.76],
    "pistacking": [0.75, 0.94, 0.27],
    "pication": [0.27, 0.60, 0.56],
    "halogen": [0.94, 0.20, 0.90],
    "metal": [0.90, 0.75, 1.00],
}

In [None]:
#Let’s see what these RGB colors correspond to:

fig, axs = plt.subplots(nrows=2, ncols=4, figsize=(15, 2))
plt.subplots_adjust(hspace=1)
fig.suptitle("Colors for interaction types", size=16, y=1.2)

for ax, (interaction, color) in zip(fig.axes, color_map.items()):
    ax.imshow(np.zeros((1, 5)), cmap=colors.ListedColormap(color_map[interaction]))
    ax.set_title(interaction, loc="center")
    ax.set_axis_off()
plt.show()

In [None]:
#Define a helper function to show the interactions.

def show_interactions_3D(
    pdb_id, selected_site_interactions, highlight_interaction_colors=color_map
):
    """
    3D visualization of protein-ligand interactions.

    Parameters
    ----------
    pdb_id : str
        The pdb ID of interest.
    selected_site_interactions : dict
        Precaluclated interactions from PLIP for the selected site
    highlight_interaction_colors : dict
        The colors used to highlight the different interaction types.

    Returns
    -------
    NGL viewer with explicit interactions given by PLIP.
    """

    # Create NGLviewer
    viewer = nv.NGLWidget(height="600px", default=True, gui=True)
    # Add protein
    prot_component = viewer.add_pdbid(pdb_id)
    # Add the ligands
    viewer.add_representation(repr_type="ball+stick", selection="hetero and not water")

    interacting_residues = []
    for interaction_type, interaction_list in selected_site_interactions.items():
        color = highlight_interaction_colors[interaction_type]
        if len(interaction_list) == 1:
            continue
        df_interactions = pd.DataFrame.from_records(
            interaction_list[1:], columns=interaction_list[0]
        )
        for _, interaction in df_interactions.iterrows():
            name = interaction_type
            # add cylinder between ligand and protein coordinate
            viewer.shape.add_cylinder(
                interaction["LIGCOO"],
                interaction["PROTCOO"],
                color,
                [0.1],
                name,
            )
            interacting_residues.append(interaction["RESNR"])
    # Display interacting residues
    res_sele = " or ".join([f"({r} and not _H)" for r in interacting_residues])
    res_sele_nc = " or ".join([f"({r} and ((_O) or (_N) or (_S)))" for r in interacting_residues])
    prot_component.add_ball_and_stick(sele=res_sele, colorScheme="chainindex", aspectRatio=1.5)
    prot_component.add_ball_and_stick(sele=res_sele_nc, colorScheme="element", aspectRatio=1.5)
    # Center on binding site
    viewer.center("ligand")
    return viewer

In [None]:
viewer_3D = show_interactions_3D(pdb_id, interactions_by_site[selected_site])
viewer_3D

In [None]:
viewer_3D.render_image(trim=True, factor=2, transparent=True);

In [None]:
viewer_3D._display_image()

In [None]:
"""Analysis of interactions¶
As we can see in the NGL viewer, PLIP manages to identify different interactions between the protein and the ligand in the binding site, for our kinase example (3poz):

The typical hinge hydrogen binding with methionine residue MET793.

Hydrophobic interactions with the following residues:

LYS745

LEU788

THR790

THR854

LEU858

Halogen interactions with residues:

MET766

LEU788

THR790

Note that for example the hinge interaction is equally identified in KLIFS, see 3poz KLIFS fingerprint, while the hydrophobic interactions identified with PLIP are only a subset of those in KLIFS. Halogen interactions are not explicitly annotated in KLIFS.

All the identified interactions in NGLview do indeed correspond to the table of interactions generated above."""