In [None]:
import pandas as pd
from IDLPPBopt import PPBPredictor, PPBPredictorViz
from IDLPPBopt.interface.Visualizer import AtomAttentionVisualizer

# Simple prediction

In [None]:
# ::: READ DATA
#
INPUT_FILE = 'input_compounds.csv'
SMILES_COLUMN = 'cano_smiles'

df = pd.read_csv(INPUT_FILE)
smiles_series = df['cano_smiles']

# ::: RUN PREDICTOR
#
ppbpred = PPBPredictor()
ppbpred.load_smiles(smiles_series)
ppbpred.prepare_model()
results = ppbpred.evaluate()

# ::: SAFE results
#
results = pd.Series(results, index=smiles_series.index)
df['IDL-PPBopt'] = results
df
# df.to_csv('results.csv')

# Visualize prediction

In [None]:
from IPython.display import	SVG, display

# ::: READ DATA
#
INPUT_FILE = 'input_compounds.csv'
SMILES_COLUMN = 'cano_smiles'

df = pd.read_csv(INPUT_FILE)
smiles_series = df['cano_smiles']

# ::: RUN PREDICTOR
#
ppbviz = PPBPredictorViz()
ppbviz.load_smiles(smiles_series)
ppbviz.prepare_model()
mol_predictions, mol_attnweights = ppbviz.evaluate()

# ::: VISUALIZE PREDICTION
#
# Initializing visualizer
viz = AtomAttentionVisualizer(vmin=0, vmax=.05)

for smi, weights, pred in zip(ppbviz.smiles_series, mol_attnweights, mol_predictions):
    print(smi, pred, sep='\t')
    # Generate and display SVG
    svg = viz.draw_svg(smi, weights)
    display(SVG(svg))


# Color privileged substructures by heuristic

In [None]:
from IPython.display import	SVG, display
from matplotlib import cm
from rdkit import Chem
from IDLPPBopt import AtomAttentionVisualizer
from IDLPPBopt.utils import find_privileged_substructures, extract_substructure_by_indices
from IDLPPBopt.config import GPSUBS_PATH


GPSubs = pd.read_csv(GPSUBS_PATH)
gb = GPSubs.groupby('GPSub')
viz = AtomAttentionVisualizer(vmin=0, vmax=.05)

for smi, weights, pred in zip(ppbviz.smiles_series, mol_attnweights, mol_predictions):
    print(smi, pred, weights.max(), sep='\t')

    substructures = find_privileged_substructures(smi, weights)

    mol = Chem.MolFromSmiles(smi)
    substructure_colors = []
    for submol in (extract_substructure_by_indices(mol, idcs) for idcs in substructures):
        submol_score = 0.
        # Iterate over general privileged substructures
        for gpsub in gb.groups.keys():
            smarts = Chem.MolFromSmarts(gpsub)
            if not submol.HasSubstructMatch(smarts):
                continue
            # If GPSub matches, check (R)Esubs
            resubs = gb.get_group(gpsub)[['(R)Esub', 'PPB_Score']]
            for resub, score in resubs.itertuples(index=False):
                smarts = Chem.MolFromSmarts(gpsub)
                if submol.HasSubstructMatch(smarts):
                    submol_score += score
        if submol_score < 0:
            substructure_colors.append((0., 1., 0., 1.))
        elif submol_score > 0:
            substructure_colors.append((1., 0., 0., 1.))
        else:
            substructure_colors.append((1., 1., 0., 1.))

    # Generate and display SVG
    svg = viz.draw_svg(
        smi, weights, substructures, substructure_colors=substructure_colors)
    display(SVG(svg))