# Graph Convolutional Neural Network model

This model is based on Convolutional Neural Networks where input molecules are modelled as undirected 2D graphs, using the pytorch library

The model takes SMILES of any query molecules as input and returns the predicted probability of binding to each non-orphan TAS2R receptor and explanations generated by UGrad-CAM

***

<div class="alert alert-block alert-warning">
Before running the following code, please make sure to have <b>all the required libraries</b>. Instruction how to obtain the full environment are present in the <b>README file</b> of this repository
</div>

***

Import the libraries and the functions from the main script

In [1]:
import os, shutil
import pandas as pd
import numpy as np
from rdkit import Chem
from chembl_structure_pipeline import standardizer
import torch
from torch_geometric.loader import DataLoader
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.rdmolops import RemoveAllHs
from rdkit_heatmaps import mapvalues2mol
from rdkit_heatmaps.utils import transform2png
from ipywidgets import interact, interactive, fixed, interact_manual

[11:25:55] Initializing Normalizer


In [2]:
# MacOS users:
# if you are encountering an error with enchant, uncomment the following line
# customise the path to your enchant library (/opt/homebrew/opt/enchant/lib/libenchant-2.dylib)
# to find the location of the enchant library on MacOS, run `brew --prefix enchant`
#%env PYENCHANT_LIBRARY_PATH=/opt/homebrew/opt/enchant/lib/libenchant-2.dylib

env: PYENCHANT_LIBRARY_PATH=/opt/homebrew/opt/enchant/lib/libenchant-2.dylib


In [3]:
# import the evaluation function for the TML model (folder in ../TML)
import os
import sys
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'GCN/'))

import GCN_Eval
from GCN_Eval import eval_gcn

Insert the input molecule

In [4]:
# the code takes as input several file format, such as SMILES, FASTA, Inchi, PDB, Sequence, Smarts, pubchem name
# if you provide the pubchem name, the code will automatically query the pubchem database to get the SMILES and run the code
smiles = 'CC(CC1=CC2=C(C=C1)OCO2)NC'

Set TRUE to plot and save a **UGrad-CAM** plot for every pair. 

Plot the gradient activation with UGrad-CAM, where Red is associated with activation of the node toward class 1, and Blue toward class 0

<div class="alert alert-block alert-danger">
<b> ATTENTION: </b> Activating UGrad-CAM for larger queries (>10 molecules) is not recommeded due to its high computational cost
<div>

In [6]:
# Activates plotting of explanations with UGrad-CAM
PLOT_UGRADCAM = True

Run the evaluation task over the input molecule for every non-orphan receptors with the trained model

In [7]:
final_results_df = eval_gcn(smiles, verbose=True, plot_ugradcam=PLOT_UGRADCAM, outdir=os.getcwd())

[INFO  ] Input has been interpeted as SMILES


Processing...
Done!


[INFO   ] Plotting UGrad-CAMs for molecule #1
[INFO   ]   - TAS2R1
[INFO   ]   - TAS2R3
[INFO   ]   - TAS2R4
[INFO   ]   - TAS2R5
[INFO   ]   - TAS2R7
[INFO   ]   - TAS2R8
[INFO   ]   - TAS2R9
[INFO   ]   - TAS2R10
[INFO   ]   - TAS2R13
[INFO   ]   - TAS2R14
[INFO   ]   - TAS2R16
[INFO   ]   - TAS2R38
[INFO   ]   - TAS2R39
[INFO   ]   - TAS2R40
[INFO   ]   - TAS2R41
[INFO   ]   - TAS2R42
[INFO   ]   - TAS2R43
[INFO   ]   - TAS2R44
[INFO   ]   - TAS2R46
[INFO   ]   - TAS2R47
[INFO   ]   - TAS2R49
[INFO   ]   - TAS2R50


## Results

Every prediction displayed is the probability of the bind of each molecule to each receptor, from 0 (no-bind) to 1 (bind)

The Applicability Domain column shows if the input molecule is similar enough to the ones in the training dataset. If the check returns FALSE it is strongly advised to not consider the prediction for that molecule as reliable

In [8]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(final_results_df)

Unnamed: 0,Standardized SMILES,Check AD,1,3,4,5,7,8,9,10,13,14,16,38,39,40,41,42,43,44,46,47,49,50
0,CNC(C)Cc1ccc2c(c1)OCO2,True,0.47,0.01,0.62,0.02,0.09,0.16,0.18,0.7,0.05,0.6,0.06,0.55,0.29,0.14,0.0,0.0,0.37,0.16,0.7,0.1,0.22,0.06


Display the explanations using the UGrad-CAM method

In [9]:
# plot the UGradCAM explanation for the selected receptor
def plot_UGradCAM(receptor): 

    from IPython.display import Image, display

    # check if the UGradCAM results are available
    if PLOT_UGRADCAM is None:
        raise ValueError('UGradCAM results are not available. Please set PLOT_UGRADCAM to True.')

    results_path = os.path.join(os.getcwd(), 'UGradCAM', final_results_df["Standardized SMILES"].iloc[0])

    if os.path.exists(os.path.join(results_path, '0', receptor+'.png')):
        print('Selected SMILES is predicted to NOT interact with the receptor', receptor)
        class_flag = 0

    elif os.path.exists(os.path.join(results_path, '1', receptor+'.png')):
        print('Selected SMILES is predicted to interact with the receptor', receptor)
        class_flag = 1
    else:
        class_flag = None

    if class_flag is not None:
        display(Image(filename=os.path.join(results_path, str(class_flag), receptor + '.png')))
    else:
        # quit code and return error
        raise ValueError('RECEPTOR NOT PREDICTED: select another available receptor!')
    
def run_plot(receptor):
    hTAS2R = [1, 3, 4, 5, 7, 8, 9, 10, 13, 14, 16, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 50]
    receptors = ['TAS2R'+str(i) for i in hTAS2R]
    try: 
        plot_UGradCAM(receptor)
    except Exception as e:
        print('Error:', e)
        print('\nAvailable receptors:') 
        for rec in receptors:
            print( '- '  + rec)

In [10]:
# Receptors that the model is trained to evaluate over
hTAS2R = [1, 3, 4, 5, 7, 8, 9, 10, 13, 14, 16, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 50]
receptors = ['TAS2R'+str(i) for i in hTAS2R]

In [11]:
interact(run_plot, receptor=receptors)

interactive(children=(Dropdown(description='receptor', options=('TAS2R1', 'TAS2R3', 'TAS2R4', 'TAS2R5', 'TAS2R…

<function __main__.run_plot(receptor)>