# eTransafe Omeprazole Heatmap

This is the use scenario that has been described in the eTox project for exploring the differences with respect to adverse events between omeprazole preclinical and clinical. In order to compute these results the following scenario is executed:
1. authenticate with the eTransafe keycloak
2. Translate the provided compound (e.g., omeprazole) to a SMILES
3. Retrieve similar compounds based on structural similarity
4. Retrieve data from the preclinical and clinical databases
5. Aggregate the data per system organ class
6. Visualize the data using a heatmap

(C) 2020 Erasmus University Medical Center, Rotterdam, The Netherlands
Author: Erik M. van Mulligen, e.vanmulligen@erasmusmc.nl

In [9]:
from knowledgehub.api import KnowledgeHubAPI
import ipywidgets as w
from IPython.display import display, Javascript
from ipypublish import nb_setup
import numpy as np
import seaborn as sns
import pandas
import matplotlib.pyplot as plt

In [10]:
api = KnowledgeHubAPI()

# 1. Authentication
Use eTransafe's authentication service to get access to available services

In [11]:
username = w.Text(value='e.vanmulligen@erasmusmc.nl',placeholder='Knowledge Hub account', description='username:', disabled=False)
password = w.Password(value='', placeholder='Knowledge Hub password', description='password:', disabled=False)
loginBtn = w.Button(description='Login')
status = w.Output()

def on_button_clicked(_):
    if api.login(username.value, password.value) == False:
        print("Failed to login")
    else:
        print("successfully logged in")
        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))

loginBtn.on_click(on_button_clicked)
w.VBox([username, password, loginBtn])

VBox(children=(Text(value='e.vanmulligen@erasmusmc.nl', description='username:', placeholder='Knowledge Hub ac…

401
Failed to login
token:eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJFUWFIX3huSGNGOVJxb3ZDR1I2aUVvSERxZzRvdjhRRFZ2Q0ozVlprUjJBIn0.eyJqdGkiOiI4NjJiMWY3OS1lYTIxLTQ2ODAtOGQ3NS05ZTQ5Mzk3NTE5YmUiLCJleHAiOjE2MTE4NTA1MjUsIm5iZiI6MCwiaWF0IjoxNjExODQ2OTI1LCJpc3MiOiJodHRwczovL2xvZ2luLmV0cmFuc2FmZS5ldS9hdXRoL3JlYWxtcy9LSCIsImF1ZCI6WyJhY2NvdW50Iiwia25vd2xlZGdlLWh1YiJdLCJzdWIiOiI2MWZlOWY0Ni04Zjk4LTQ3OTEtOGUwOC03ZjViZjQxZGMwNDYiLCJ0eXAiOiJCZWFyZXIiLCJhenAiOiJrbm93bGVkZ2UtaHViIiwiYXV0aF90aW1lIjowLCJzZXNzaW9uX3N0YXRlIjoiMmUwMzRjNWMtNTJhMC00MmEzLWIyNDgtYzlkZjk2M2ZiOTA4IiwiYWNyIjoiMSIsImFsbG93ZWQtb3JpZ2lucyI6WyIqIl0sInJlYWxtX2FjY2VzcyI6eyJyb2xlcyI6WyJraC1hY2Nlc3MiLCJvZmZsaW5lX2FjY2VzcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJwcm9maWxlIGVtYWlsIiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJuYW1lIjoiRXJpayBNLiB2YW4gTXVsbGlnZW4iLCJpc3MiOiJodHRwczovL2xvZ2luLmV0cmFuc2FmZS5ldS9hdXRoL3

<IPython.core.display.Javascript object>

## 2. Translate compound to SMILES using semantic services
For the entered compound name, retrieve the associated SMILES using the semantic services

In [7]:
compoundName = w.Text(value='omeprazole',placeholder='compound name', description='compound:', disabled=False)
compoundBtn = w.Button(description='Retrieve')
compoundSmile = None

def on_compound_entered(_):
        compound = api.SemanticService().normalize(compoundName.value, ['RxNorm','smiles'])
        if 'concepts' in compound:
            for concept in compound['concepts']:
                if 'vocabularyId' in concept:
                    if concept['vocabularyId'] == 'smiles':
                        global compoundSmile
                        compoundSmile = concept['conceptCode']
                        print(f'Found SMILES {compoundSmile} for {compoundName.value}')
                        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.get_selected_index()+2)'))

compoundBtn.on_click(on_compound_entered) 
w.VBox([compoundName, compoundBtn])

VBox(children=(Text(value='omeprazole', description='compound:', placeholder='compound name'), Button(descript…

## 3. Retrieve similar compounds

In [8]:
similar_compounds = api.SimilarityService().get(compoundSmile, nr_results = 20)
compoundIds = []
compoundNames = []
names = []
smiles = []
similarities = []

if similar_compounds != None:
    if ('search_results' in similar_compounds) and (len(similar_compounds['search_results']) == 1):
        search_result = similar_compounds['search_results'][0]
        if 'obj_nam' in search_result:
            for i in range(len(search_result['obj_nam'])):
                names.append(search_result['obj_nam'][i])
                smiles.append(search_result['SMILES'][i])
                similarities.append("{:.4f}".format(search_result['distances'][i]))

            for cmp in search_result['obj_nam']:
                concept = api.SemanticService().normalize(cmp, ['RxNorm'])
                if 'concepts' in concept and len(concept['concepts']) == 1:
                    compoundIds.append(concept['concepts'][0]['conceptCode'])
                    compoundNames.append(concept['concepts'][0]['conceptName'])
        else:
            print('something wrong in the result object from the similarity service')    

pd = nb_setup.setup_pandas(escape_latex=False)
df = pd.DataFrame(np.random.rand(len(names),3),columns=['NAME','SMILES','SIMILARITY'])
df.NAME = names
df.SMILES = smiles
df.SIMILARITY = similarities
df.round(3)

TypeError: No registered converter was able to produce a C++ rvalue of type std::__1::basic_string<wchar_t, std::__1::char_traits<wchar_t>, std::__1::allocator<wchar_t> > from this Python object of type NoneType

# 4. Retrieve data from the preclinical and clinical databases

In [None]:
studies = api.Medline().getStudiesByCompoundIds(compoundIds) + \
          api.Faers().getStudiesByCompoundIds(compoundIds) + \
          api.ClinicalTrials().getStudiesByCompoundIds(compoundIds) + \
          api.eToxSys().getStudiesByCompoundNames(compoundNames)
print(f'Found {len(studies)} studies.')

# 5. Aggregate the data per system organ class

Since the eTox data reports events with an organ. We use our own method to map it to MedDRA's system organ class to make it comparable

In [None]:
system = {}
all_compounds = [c.lower() for c in compoundNames]
socs = {}

# traverse all studies and collect the system organ classes; keep track of the # studies per class
for study in studies:
    source = study['source']
    if source != 'eTOXsys' or study['FINDING']['finding'] != 'No abnormalities detected':
        specimenOrgans = api.SemanticService().getSocs(study['FINDING']['specimenOrgan'])
        for specimenOrgan in specimenOrgans:
            if len(specimenOrgan) > 0:
                if not specimenOrgan in socs:
                    socs[specimenOrgan] = 1
                else:
                    socs[specimenOrgan] += 1
                    
# sort the socs per count
all_socs = {k: v for k, v in sorted(socs.items(), key=lambda item: item[1], reverse=True)}

# traverse all studies and create a matrix per source
for study in studies:
    source = study['source']
    if not source in system.keys():
        system[source] = {'data':np.zeros((len(all_socs),len(all_compounds)), dtype=int).tolist(), 
                          'rows':list(all_socs.keys()), 
                          'cols':all_compounds}
    
    if source != 'eTOXsys' or study['FINDING']['finding'] != 'No abnormalities detected':
        specimenOrgans = api.SemanticService().getSocs(study['FINDING']['specimenOrgan'])
        for specimenOrgan in specimenOrgans:
            if len(specimenOrgan) > 0:
                row = system[source]['rows'].index(specimenOrgan)
                col = system[source]['cols'].index(study['COMPOUND']['name'].lower())
                system[source]['data'][row][col] += 1
                

# 6. Visualize the data using a heatmap

Using seaborn to visualize the content of the various databases. Note that we have to think about ways to easier compare the various results.

In [None]:
figure, axes = plt.subplots(figsize=(20,20))
i = 1
for source,value in system.items():
    plt.subplot(4, 1, i)
    df = pandas.DataFrame(value['data'], index=value['rows'], columns=value['cols'])
    ax = sns.heatmap(df, linewidths=1.0, cmap="YlGnBu")
    ax.set_xticklabels(ax.get_xmajorticklabels(), fontsize=14, rotation=45)
    ax.set_yticklabels(ax.get_ymajorticklabels(), fontsize=14)
    plt.ylabel(source, fontsize=20)
    i += 1

figure.tight_layout()
plt.show()