In this notebook, we will learn how to display chemical structures in a hover box using plotly/dash. As a dummy example for this, we load some structures, compute their morgan fingerprints and calculate the dissimilarity matrix based on the Tanimoto coefficients of these fingerprints. We use the dissimilarity matrix to perform multdimensionality scaling as a dimensionality reduction technique. We display the resulting embeddings and show the chemical structures in a hover box:

In [1]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit import DataStructs
from jupyter_dash import JupyterDash
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, no_update
from sklearn.manifold import MDS

In [2]:
with open("smiles.txt") as file:
    smiles = file.read().split("\n")

df = pd.DataFrame(smiles, columns=["smiles"])  
df["mol"] = df.smiles.apply(Chem.MolFromSmiles)
df["morgan_fingerprints"] = df.mol.apply(lambda x: AllChem.GetMorganFingerprintAsBitVect(x, radius=2))
df.head()

Unnamed: 0,smiles,mol,morgan_fingerprints
0,c1ccc2c(c1)CCCN2,<rdkit.Chem.rdchem.Mol object at 0x000001C0FAD...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,CNc1ccccc1,<rdkit.Chem.rdchem.Mol object at 0x000001C0FAD...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,c1ccc2c(c1)CCN2,<rdkit.Chem.rdchem.Mol object at 0x000001C0FAD...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,c1ccc2[nH]ccc2c1,<rdkit.Chem.rdchem.Mol object at 0x000001C0FAD...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,c1cnc2c(c1)CCCC2,<rdkit.Chem.rdchem.Mol object at 0x000001C0FAD...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [3]:
def get_dissimilarity_matrix(fingerprints):
    n_molecules = len(fingerprints)
    matrix = np.zeros((n_molecules, n_molecules))
    for idx1 in range(n_molecules):
        for idx2 in range((idx1+1), n_molecules):
            matrix[idx2, idx1] = DataStructs.DiceSimilarity(fingerprints[idx1], fingerprints[idx2])
    matrix = np.triu(matrix.T, 1) + matrix
    return matrix

dissimilarity_matrix = get_dissimilarity_matrix(df.morgan_fingerprints)

Note: the high stress value is a indication that multidimensional scaling is not able to accurately represent the dissimilarity of the molecules in just two dimensions.

In [4]:
mds = MDS(n_components=2, dissimilarity="precomputed", metric=False)

embeddings = mds.fit_transform(dissimilarity_matrix)
df["mds_dimension1"] = embeddings[:,0]
df["mds_dimension2"] = embeddings[:,1]
mds.stress_

0.7720266185685115

In [5]:
# Save molecule images
for count, mol in enumerate(df.mol):
    Draw.MolToFile(mol, f"./assets/{count}.png")

# Display    
app = JupyterDash()

fig = go.Figure(data=[
    go.Scatter(x=df["mds_dimension1"], y=df["mds_dimension2"],
               mode="markers")])
fig.update_xaxes(title_text="dimension 1")
fig.update_yaxes(title_text="dimension 2")
fig.update_traces(hoverinfo="none", hovertemplate=None)

app.layout = html.Div([
    dcc.Graph(id="my-graph", figure= fig, clear_on_unhover=True),
    dcc.Tooltip(id="graph-tooltip")
])

@app.callback(
    Output("graph-tooltip", "show"),
    Output("graph-tooltip", "bbox"),
    Output("graph-tooltip", "children"),
    Input("my-graph", "hoverData"))

def display_hover(hoverData):
    if hoverData is None:
        return False, no_update, no_update

    pt = hoverData["points"][0]
    bbox = pt["bbox"]
    num = pt["pointNumber"]
    img_src = f"{num}.png"

    children = [
        html.Div([
            html.Img(src=app.get_asset_url(img_src), style={"width": "200px"}),
        ], style={"width": "200px", "white-space": "normal"})
    ]
    return True, bbox, children

app.run_server(mode="Inline")