 # eTransafe Omeprazole Heatmap

This is the use scenario that has been described in the eTox project for exploring the differences with respect to adverse events between omeprazole preclinical and clinical. In order to compute these results the following scenario is executed:
1. authenticate with the eTransafe keycloak
2. Translate the provided compound (e.g., omeprazole) to a SMILES
3. Retrieve similar compounds based on structural similarity
4. Retrieve data from the preclinical and clinical databases
5. Aggregate the data per system organ class
6. Visualize the data using a heatmap

(C) 2021 Erasmus University Medical Center, Rotterdam, The Netherlands
Author: Erik M. van Mulligen, e.vanmulligen@erasmusmc.nl

In [1]:
import sys
sys.path.append('/Users/mulligen/git/etransafe-use-scenarios')
from knowledgehub.api import KnowledgeHubAPI
import ipywidgets as w
from IPython.display import display, Javascript
from ipypublish import nb_setup
import numpy as np
import numpy.ma as ma
import seaborn as sns
import pandas
from matplotlib.colors import LogNorm, Normalize
import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings("ignore")

In [2]:
#api = KnowledgeHubAPI(server='TEST', client_secret='39c644b3-1f23-4d94-a71f-e0fb43ebd760')
api = KnowledgeHubAPI(server='DEV', client_secret='3db5a6d7-4694-48a4-8a2e-e9c30d78f9ab')

# 1. Authentication
Use eTransafe's authentication service to get access to available services

In [3]:
username = w.Text(value='erik.mulligen',placeholder='Knowledge Hub account', description='username:', disabled=False)
password = w.Password(value='', placeholder='Knowledge Hub password', description='password:', disabled=False)
loginBtn = w.Button(description='Login')
status = w.Output()

def on_button_clicked(_):
    if api.login(username.value, password.value) == False:
        print("Failed to login")
    else:
        print("successfully logged in")
        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))

loginBtn.on_click(on_button_clicked)
w.VBox([username, password, loginBtn])

VBox(children=(Text(value='erik.mulligen', description='username:', placeholder='Knowledge Hub account'), Pass…

successfully logged in


<IPython.core.display.Javascript object>

## 2. Translate compound to SMILES using semantic services
For the entered compound name, retrieve the associated SMILES using the semantic services

In [4]:
compoundName = w.Text(value='omeprazole',placeholder='compound name', description='compound:', disabled=False)
compoundBtn = w.Button(description='Retrieve')
compoundSmile = None

def on_compound_entered(_):
    global compoundSmile
    print(f'retrieving smiles for {compoundName.value}')
    compoundSmile = api.ChemistryService().getSMILESByName(compoundName.value)
    print(f'Found SMILES {compoundSmile} for {compoundName.value}')
    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))

compoundBtn.on_click(on_compound_entered) 
w.VBox([compoundName, compoundBtn])

VBox(children=(Text(value='omeprazole', description='compound:', placeholder='compound name'), Button(descript…

retrieving smiles for omeprazole
Found SMILES ['COc1ccc2[nH]c([S+]([O-])Cc3ncc(C)c(OC)c3C)nc2c1', 'COc1ccc2nc3n(c2c1)SCc1c(C)c(OC)c(C)c[n+]1-3', 'COc1ccc2[nH]c(S(=O)(=O)Cc3ncc(C)c(OC)c3C)nc2c1'] for omeprazole


<IPython.core.display.Javascript object>

## 3. Retrieve similar compounds

In [None]:
similar_compounds = api.SimilarityService().get(compoundSmile[0])
compoundNames = []
names = []
smiles = []
similarities = []

if similar_compounds != None:
    for similar_compound in similar_compounds:
        names.append(similar_compound['name'])
        smiles.append(similar_compound['smiles'])
        similarities.append(similar_compound['distance'])

pd = nb_setup.setup_pandas()
df = pd.DataFrame(np.random.rand(len(names),3),columns=['NAME','SMILES','SIMILARITY'])
df.NAME = names
df.SMILES = smiles
df.SIMILARITY = similarities
df.round(3)

# 4. Retrieve data from the preclinical and clinical databases

In [None]:
# filter studies on being able to have a findingCode and findingVocabulary and not having findings for dose is 0.0 (control group)
def filterStudies(studies):
    return [study for study in studies if study['FINDING']['findingVocabulary'] is not None and study['FINDING']['findingCode'] is not None and study['FINDING']['findingCode'] != 'MC:2000001' 
            and ('dose' not in study['FINDING'] or study['FINDING']['dose'] != 0.0)]

faers_studies = filterStudies(api.Faers().getStudiesBySMILES(smiles))
api.SemanticService().getSocs(faers_studies, algorithm='MEDDRAPT2MEDDRASOC')
medline_studies = filterStudies(api.Medline().getStudiesBySMILES(smiles))
api.SemanticService().getSocs(medline_studies, algorithm='MEDDRAPT2MEDDRASOC')
ct_studies = filterStudies(api.ClinicalTrials().getStudiesBySMILES(smiles))
api.SemanticService().getSocs(ct_studies, algorithm='MEDDRAPT2MEDDRASOC')
dailymed_studies = filterStudies(api.DailyMed().getStudiesBySMILES(smiles))
api.SemanticService().getSocs(dailymed_studies, algorithm='MEDDRAPT2MEDDRASOC')
etox_studies = filterStudies(api.eToxSys().getStudiesByCompoundNames(names))
api.SemanticService().getSocs(etox_studies, algorithm='MA2MEDDRASOC')

studies = faers_studies + medline_studies + ct_studies + dailymed_studies + etox_studies
print(f'Found {len(studies)} studies.')

# 5. Aggregate the data per system organ class

Since the eTox data reports events with an organ. We use our own method to map it to MedDRA's system organ class to make it comparable

In [None]:
system = {}
all_compounds = [c.lower() for c in names]
socs = {}

for study in studies:
    if study['FINDING']['findingVocabulary'] is not None and study['FINDING']['findingCode'] is not None:
        soc = study['FINDING']['__soc']
        if soc not in socs:
            socs[soc] = study['FINDING']['count']
        else:
            socs[soc] += study['FINDING']['count']


# sort the socs per count
all_socs = {k: v for k, v in sorted(socs.items(), key=lambda item: item[1], reverse=True)}


# traverse all studies and create a matrix per source
for study in studies:
    if study['FINDING']['findingVocabulary'] is not None and study['FINDING']['findingCode'] is not None:
        source = study['source']
        if not source in system.keys():
            system[source] = {'data':np.zeros((len(all_socs),len(all_compounds)), dtype=int).tolist(), 
                              'rows':list(all_socs.keys()), 
                              'cols':all_compounds}
    
        soc = study['FINDING']['__soc']
        row = system[source]['rows'].index(soc)
        col = system[source]['cols'].index(study['COMPOUND']['name'].lower())
        system[source]['data'][row][col] += study['FINDING']['count']
                

# 6. Visualize the data using a heatmap

Using seaborn to visualize the content of the various databases. Note that we have to think about ways to easier compare the various results.

In [None]:
i = 1
service_names = [{'name': 'faerspa', 'title':'FAERS'}, 
                 {'name': 'medlinepa', 'title': 'MEDLINE'},
                 {'name': 'eTOXsys', 'title': 'eTOXsys'},
                 {'name': 'dailymedpa', 'title': 'DailyMed'},
                 {'name': 'clinicaltrialspa', 'title': 'ClinicalTrials'},]
for source,value in system.items():
    plt.figure(figsize=(12,9))
    data = system[source]['data']
    
    # create mask
    data_mask = ma.array(np.zeros((len(all_socs.keys()), len(all_compounds))))
    for r in range(0, len(all_socs.keys())):
        for c in range(0, len(all_compounds)):
            data_mask[r][c] = 1 if data[r][c] == 0 else 0

    colormap = sns.cubehelix_palette(as_cmap=True, light=.9)
    ax = sns.heatmap(data, mask=data_mask, xticklabels=all_compounds, yticklabels=list(all_socs.keys()), annot=True, fmt=".0f", cmap=colormap) 
    ax.set_xticklabels(ax.get_xmajorticklabels(), rotation=45)
    
    title = source
    for service_name in service_names:
        if service_name['name'] == source:
            title = service_name['title']
            break
            
    plt.title(title, fontsize = 14)
    plt.ylabel("Findings per organ class", fontsize = 12)
    plt.xlabel("Similar compounds", fontsize = 12)
    plt.show()

    i += 1
    print('')
    print('')