In [None]:
%%capture
# setup dependencies for Google Colab
!pip install rdkit-pypi mols2grid
!wget https://raw.githubusercontent.com/rdkit/rdkit/master/Docs/Book/data/solubility.test.sdf

In [1]:
import mols2grid
from pathlib import Path
from rdkit import RDConfig
SDF_PATH = (f"{RDConfig.RDDocsDir}/Book/data/solubility.test.sdf"
            if Path(RDConfig.RDDocsDir).is_dir() else "solubility.test.sdf")

In [2]:
mols2grid.display(SDF_PATH,
                  # RDKit's MolDrawOptions parameters
                  fixedBondLength=25,
                  # rename fields for the output document
                  mapping={"SOL": "Solubility",
                           "SOL_classification": "Class",
                           "NAME": "Name"},
                  # set what's displayed on the grid
                  subset=["ID", "img", "Solubility"],
                  # set what's displayed on the tooltips
                  tooltip=["Name", "SMILES", "Class", "Solubility"],
                  # style for the grid labels and tooltips
                  style={"Solubility": lambda x: "color: red" if x < -3 else "color: black"},
                  fmt={"Solubility": lambda x: f"Sol: {x}"})

If we click on the checkbox of the last, first and middle molecules on the first page (id 75, 5 and 40), we can retrieve the corresponding index and SMILES (in the same order):

In [3]:
# retrieve the index and SMILES of your selection
mols2grid.selection

{14: 'c1c(C)cc2ccccc2c1C', 0: 'CCC(C)CC', 7: 'c1ccccc1CC'}

You can also retrieve your selection (and the data that came with it) in a pandas DataFrame if you use the `MolGrid` class (which is used by the `mols2grid.display` function implicitely). The MolGrid class takes a dataframe as input, but you can also use the `MolGrid.from_mols` and `MolGrid.from_sdf` classmethods if needed.

In [4]:
# example dataframe
import pandas as pd

smiles = ["CCO", "c1ccccc1", "N", "CO", "O=S(=O)(-O)(-O)", "CCC", "CCC=O"]
df = pd.DataFrame({"smi": smiles,
                   "id": range(1, len(smiles) + 1)})
# setup the grid
mg = mols2grid.MolGrid(df, smiles_col="smi", size=(110, 90))
mg.display(subset=["id", "img"], n_cols=7)

In [5]:
# retrieve your selection as a dataframe
mg.get(mols2grid.selection)

Unnamed: 0,smi,id
0,CCO,1
4,O=S(=O)(-O)(-O),5


In [6]:
# retrieve specific indexes
mg.get([0, 4])

Unnamed: 0,smi,id
0,CCO,1
4,O=S(=O)(-O)(-O),5


Here's an example with annotations and highlights of molecules, and more complex drawing options:

In [7]:
from rdkit import Chem
from rdkit.Chem import Draw

# drawing options
opts = Draw.MolDrawOptions()
opts.useBWAtomPalette() # black and white atom palette
opts.setHighlightColour((0, 1, 1)) # cyan highlights
# opts.setAnnotationColour((0.8, 0, 0.8)) # purple notes
opts.annotationFontScale = 0.8 # notes font scale
opts.clearBackground = False # transparent bg
# Draw.SetComicMode(opts) # xkcd style

mols = [Chem.MolFromSmiles(smi) for smi in smiles]
# annotate atom in first molecule
mols[0].GetAtomWithIdx(0).SetProp("atomNote", "1")
# annotate bond
mols[1].GetBondWithIdx(0).SetProp("bondNote", "2")
# highlight atoms manually
mols[3].__sssAtoms = [0]
# highlight atoms through SMARTS querry
mols[4].GetSubstructMatch(Chem.MolFromSmarts("S=O"))
# display
mols2grid.display(mols, template="table", n_cols=7,
                  subset=["img"], size=(120, 100),
                  MolDrawOptions=opts, selection=False)