In [1]:
from bokeh.io import output_notebook
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource, output_file
import pandas as pd
from rdkit import Chem
from rdkit.Chem import rdChemReactions as Reactions
import rdkit.Chem.Draw
from bokeh.palettes import d3
import bokeh.models as bmo
import numpy as np

In [24]:
df = pd.read_csv('data_files/xxx.csv')
print(len(df))
print(df.columns)
print(df['bio_orthogonal_potential'].max())
print(len(df[df['dipole'] == 'C[N+]([O-])=C(c1ccccc1)c1ccccc1']))

110135
Index(['Unnamed: 0', 'rxn_id', 'rxn_smiles', 'predicted_activation_energy',
       'predicted_reaction_energy', 'dipole', 'dipolarophile',
       'lowest_dipole_barrier', 'bio_orthogonal_potential',
       'dipolarophile_scaffold', 'dipole_scaffold'],
      dtype='object')
17.5847925
507


In [38]:
df1 = df[df['predicted_activation_energy'] < 24][df['dipole'] != '[O-][NH+]=C(c1ccccc1)c1ccccc1']
print(df1[df1['dipolarophile_scaffold'] == 'norbornene'].bio_orthogonal_potential.max())
#print(df[df['dipole'] == 'C[N+]([O-])=C(c1ccccc1)c1ccccc1'][df['bio_orthogonal_potential'] > 15].rxn_smiles.tolist())

2.981876


  df1 = df[df['predicted_activation_energy'] < 22][df['dipole'] != '[O-][NH+]=C(c1ccccc1)c1ccccc1']


In [45]:
df2 = df[df['predicted_activation_energy'] < 21][df['dipolarophile_scaffold'] != 'non-strained'][df['lowest_dipole_barrier'] > 28][df['predicted_reaction_energy'] < -50]
print(df2[df2['dipole_scaffold'] == 'propargyl'].bio_orthogonal_potential.max())

7.504465


  df2 = df[df['predicted_activation_energy'] < 21][df['dipolarophile_scaffold'] != 'non-strained'][df['lowest_dipole_barrier'] > 28][df['predicted_reaction_energy'] < -50]


In [74]:
df['donating_groups_dipolarophile'] = df['dipolarophile'].apply(lambda x: x.count('(OC)') + x.count('(C)') - x.count('(=O)'))
print(len(df[df['donating_groups_dipolarophile'] >= 1]))
df3 = df[df['predicted_activation_energy'] < 24.9][df['lowest_dipole_barrier'] > 25][df['donating_groups_dipolarophile'] >= 3]
print(df3[df3['dipole_scaffold'] == 'propargyl'].bio_orthogonal_potential.max())

12943
2.354561999999998


  df3 = df[df['predicted_activation_energy'] < 24.9][df['lowest_dipole_barrier'] > 25][df['donating_groups_dipolarophile'] >= 3]


In [68]:
df = pd.read_csv('test.csv').sample(4000)
print(df.columns)
print(len(df[df['dipole_scaffold'] == 'cyclic']))
df['reaction_class'] = df.apply(lambda x: f'{x["dipole_scaffold"]} - {x["dipolarophile_scaffold"]}', axis=1)
print(df.iloc[257])

Index(['Unnamed: 0.1', 'Unnamed: 0', 'rxn_id', 'rxn_smiles',
       'predicted_activation_energy', 'predicted_reaction_energy', 'dipole',
       'smiles_dict', 'dipolarophile', 'lowest_dipole_barrier',
       'bio_orthogonal_potential', 'dipolarophile_scaffold',
       'dipole_scaffold'],
      dtype='object')
1351
Unnamed: 0.1                                                                2085
Unnamed: 0                                                                 51162
rxn_id                                                                    435987
rxn_smiles                     [CH3:10][NH:11][C:12](=[O:13])[C:14]([C:15]([C...
predicted_activation_energy                                             22.49261
predicted_reaction_energy                                               -45.4233
dipole                              CNC(=O)C(C(C)=O)=[N+](C)[C-](C(=O)NC)C(=O)NC
smiles_dict                    {'rxn_smiles': '[CH3:10][NH:11][C:12](=[O:13])...
dipolarophile                      

In [64]:
color_dict = {'allyl - cyclooctyne': 'blue', 'cyclic - cyclooctyne': 'orange', 'propargyl - cyclooctyne': 'green',
               'allyl - oxo-norbornadiene': 'red', 'cyclic - oxo-norbornadiene': 'purple', 'propargyl - oxo-norbornadiene': 'brown',
               'allyl - non-strained': 'pink', 'cyclic - non-strained': 'grey', 'propargyl - non-strained': 'yellow',
             }

df['color'] = df['reaction_class'].apply(lambda x: color_dict[x])

In [None]:
# Get data to plot
all_smiles = df["rxn_smiles"]
x = df["predicted_activation_energy"].values
y = df["lowest_dipole_barrier"].values
color = df["color"].values

rxn_id = df['rxn_id']

# Create SVGs for each smiles with the "new" RDKit drawing code
imgs = []
for rxn_smiles in all_smiles:
    smiles = rxn_smiles.split(">")[0]
    mol = Chem.MolFromSmiles(smiles)
    d2d = Chem.Draw.MolDraw2DSVG(150, 150)
    d2d.DrawMolecule(mol)
    d2d.FinishDrawing()
    svg = d2d.GetDrawingText()
    imgs.append(svg)

# Configure for output in the notebook
output_notebook()

# Load the data into a source and plot
source = ColumnDataSource(
    data={
        "x": x,
        "y": y,
        "imgs": imgs,
        "color": color, 
        "rxn_id": rxn_id,
    }
)
p = figure()
p.scatter("x", "y", fill_color="color", radius= 0.03, 
          fill_alpha=0.9, source=source, line_color=None)
p.x_range.flipped = True
p.xaxis.axis_label = r"$$\text{G}^{\ddagger} \text{ (kcal/mol)}$$"
p.yaxis.axis_label = r"$$\text{lowest native G}^{\ddagger} \text{ (kcal/mol)}$$"

# Create tooltips referencing stored images
TOOLTIPS = """\
    <div>
        <div>
            @imgs{safe}
        </div>
        <div>
            <span>[$index]</span>
        </div>
        <div>
            <span>($x, $y)</span>
        </div>
    </div>
"""

# Connect tooltips to plot
p.add_tools(HoverTool(tooltips=TOOLTIPS))

# Show figure
show(p)