In [2]:
from bokeh.io import output_notebook
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource, output_file
import pandas as pd
from rdkit import Chem
from rdkit.Chem import rdChemReactions as Reactions
import rdkit.Chem.Draw
from bokeh.palettes import d3
import bokeh.models as bmo
import numpy as np

In [3]:
df = pd.read_csv('screening_results.csv')

In [4]:
#too many data points slows down the generation of the interactive plot
df = df.sample(4000)

In [6]:
color_dict = {'allyl - cyclooctyne': 'blue', 'cyclic - cyclooctyne': 'orange', 'propargyl - cyclooctyne': 'green',
               'allyl - oxo-norbornadiene': 'red', 'cyclic - oxo-norbornadiene': 'purple', 'propargyl - oxo-norbornadiene': 'brown',
               'allyl - non-strained': 'pink', 'cyclic - non-strained': 'grey', 'propargyl - non-strained': 'yellow',
               'allyl - norbornene': 'red', 'cyclic - norbornene': 'purple', 'propargyl - norbornene': 'brown', 
             }

df['reaction_class'] = df.apply(lambda x: f'{x["dipole_scaffold"]} - {x["dipolarophile_scaffold"]}', axis=1)
df['color'] = df['reaction_class'].apply(lambda x: color_dict[x])

KeyError: 'propargyl - norbornene'

In [None]:
# Get data to plot
all_smiles = df["rxn_smiles"]
x = df["predicted_activation_energy"].values
y = df["lowest_dipole_barrier"].values
color = df["color"].values

rxn_id = df['rxn_id']

# Create SVGs for each smiles with the "new" RDKit drawing code
imgs = []
for rxn_smiles in all_smiles:
    smiles = rxn_smiles.split(">")[0]
    mol = Chem.MolFromSmiles(smiles)
    d2d = Chem.Draw.MolDraw2DSVG(150, 150)
    d2d.DrawMolecule(mol)
    d2d.FinishDrawing()
    svg = d2d.GetDrawingText()
    imgs.append(svg)

# Configure for output in the notebook
output_notebook()

# Load the data into a source and plot
source = ColumnDataSource(
    data={
        "x": x,
        "y": y,
        "imgs": imgs,
        "color": color, 
        "rxn_id": rxn_id,
    }
)
p = figure()
p.scatter("x", "y", fill_color="color", radius= 0.03, 
          fill_alpha=0.9, source=source, line_color=None)
p.x_range.flipped = True
p.xaxis.axis_label = r"$$\text{G}^{\ddagger} \text{ (kcal/mol)}$$"
p.yaxis.axis_label = r"$$\text{lowest native G}^{\ddagger} \text{ (kcal/mol)}$$"

# Create tooltips referencing stored images
TOOLTIPS = """\
    <div>
        <div>
            @imgs{safe}
        </div>
        <div>
            <span>[$index]</span>
        </div>
        <div>
            <span>($x, $y)</span>
        </div>
    </div>
"""

# Connect tooltips to plot
p.add_tools(HoverTool(tooltips=TOOLTIPS))

# Show figure
show(p)