# Retrieve data from eNanomapper database
https://search.data.enanomapper.net/

- This notebook uses Apache Solr API and AMBIT REST API  
- see OpenAPI3 interactive documentation at https://api.ideaconsult.net

In [1]:
from importlib import reload 
import yaml
from solrscope import aa
from solrscope import units
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import requests

import measurement
from solrscope import client_solr
from solrscope import client_ambit
from solrscope import annotation

import pandas as pd
import os.path
import numpy as np
import datetime, time
import json
import sys
import ipywidgets as widgets

import logging
from logging.config import fileConfig
fileConfig('logging_endpoints_config.ini')

global logger
logger = logging.getLogger()

%run units.py

logger.debug('Started at %s \t%s',os.name, datetime.datetime.now())



### Retrieve endpoints 

In [2]:
print('Select enanoMapper aggregated search service:')
style = {'description_width': 'initial'}
config,config_servers, config_security, auth_object, msg = aa.parseOpenAPI3()    
service_widget = widgets.Dropdown(
    options=config_servers['url'],
    description='Service:',
    disabled=False,
    style=style
)
if config_security is None:
    service = interactive(aa.search_service_open,url=service_widget)
else:
    print(msg)
    apikey_widget=widgets.Text(
            placeholder='',
            description=config_security,
            disabled=False,
            style=style
    )    
    service = interactive(aa.search_service_protected,url=service_widget,apikey=apikey_widget)    

display(service)

Select enanoMapper aggregated search service:
Enter `X-Gravitee-Api-Key` you have received upon subscription to http://api.ideaconsult.net


  config = yaml.load(requests.get(url+config).text)


interactive(children=(Dropdown(description='Service:', options=('https://api.ideaconsult.net/enanomapper', 'ht…

In [3]:
service_uri=service_widget.value
if auth_object!=None:
    auth_object.setKey(apikey_widget.value)
print("Sending queries to {}".format(service_uri))


Sending queries to https://api.ideaconsult.net/nanoreg1


In [4]:
facets = client_solr.Facets()

df = facets.summary(service_uri,auth_object, query="*:*",fields=["topcategory_s","endpointcategory_s"])    
df.head()


Unnamed: 0,topcategory_s,endpointcategory_s,Number of data points,endpointcategory_term,endpointcategory_name
0,TOX,ENM_0000068_SECTION,10990,http://www.bioassayontology.org/bao#ENM_0000068,CellViability
1,TOX,TO_GENETIC_IN_VITRO_SECTION,7811,http://www.bioassayontology.org/bao#BAO_0002167,Genetic toxicity invitro
2,TOX,NPO_1339_SECTION,3429,http://purl.obolibrary.org/obo/NPO_1339,Immunotoxicity
3,TOX,TO_REPEATED_ORAL_SECTION,1487,http://purl.enanomapper.org/onto/ENM_0000021,Repeated dose toxicity-oral
4,TOX,ENM_0000044_SECTION,1322,http://purl.enanomapper.org/onto/ENM_0000044,Barrier integrity


In [5]:
top_widget = widgets.Dropdown(
    options=df['topcategory_s'].unique(),
    value="P-CHEM",
    description='Select:',
    disabled=False,
)
category_widget = widgets.Dropdown(
    options=list(df[df['topcategory_s']=="P-CHEM"][["endpointcategory_name","endpointcategory_s"]].itertuples(index=False,name=None)),
    #value=,
    description='Category:',
    disabled=False,
)
freetext_widget=widgets.Text(
    value='NM220,NM101',
    description='Free text query',
    disabled=False
)
endpoint_widget=widgets.Text(
    value='*',
    description='Endpoint',
    disabled=False
)
def define_query(_top,_section,_freetext,_endpoint):
    #category_widget.options=df[df['topcategory_s']==top]['endpointcategory_s'].unique()
    filtered = df[df['topcategory_s']==_top]
    category_widget.options = list(filtered[["endpointcategory_name","endpointcategory_s"]].itertuples(index=False,name=None))
    top = _top
    section= _section
    
    
interact(define_query,_top= top_widget,_section=category_widget,_freetext=freetext_widget,_endpoint=endpoint_widget)

interactive(children=(Dropdown(description='Select:', index=1, options=('TOX', 'P-CHEM', 'ECOTOX'), value='P-C…

<function __main__.define_query(_top, _section, _freetext, _endpoint)>

#### Setup the query

In [6]:
top = top_widget.value
section = category_widget.value
materialfilter=freetext_widget.value
endpoint=endpoint_widget.value
if "" == materialfilter:
    materialfilter=None
logger.info('{}\t{}\t{}\t{}'.format(top,section,materialfilter,endpoint))

docs_query = client_solr.StudyDocuments()
docs_query.settings['endpointfilter'] = ' effectendpoint_s: {}'.format(endpoint)
docs_query.settings['query_guidance'] = None
docs_query.settings['query_organism'] = None
docs_query.settings['fields'] = None
docs_query.setStudyFilter({' topcategory_s': top, 'endpointcategory_s':section}) 
                    
query = docs_query.getQuery(textfilter=materialfilter,facets=None,fq=None, rows=10, _params=True, _conditions=True, _composition=False );
logger.info(query)

2019-12-15 10:26:25,874  INFO     P-CHEM	ZETA_POTENTIAL_SECTION	NM220,NM101	*
2019-12-15 10:26:25,874  INFO     {'q': '{!parent which=type_s:substance}(NM220,NM101)', 'fq': None, 'wt': 'json', 'fl': 'dbtag_hss,name_hs,publicname_hs,substanceType_hs,owner_name_hs,s_uuid_hs,[child parentFilter=filter(type_s:substance) childFilter="filter(type_s:study AND     topcategory_s:P-CHEM AND endpointcategory_s:ZETA_POTENTIAL_SECTION AND  effectendpoint_s: *)  OR filter(type_s:params AND     topcategory_s:P-CHEM AND endpointcategory_s:ZETA_POTENTIAL_SECTION)  OR filter(type_s:conditions AND     topcategory_s:P-CHEM AND endpointcategory_s:ZETA_POTENTIAL_SECTION) " limit=10000]', 'json.facet': '', 'rows': 10}


### Run the query

In [7]:
r = client_solr.get(service_uri,query=query,auth=auth_object)
logger.info(r.status_code)
docs=r.json()['response']['docs']
#print(docs)
rows = docs_query.parse(docs)



2019-12-15 10:26:31,982  INFO     200
2019-12-15 10:26:31,984  INFO     {
  "dbtag_hss": [
    "NNRG"
  ],
  "name_hs": "NM-100 (TiO2 50-150 nm)",
  "publicname_hs": "JRCNM01000a",
  "owner_name_hs": "NANoREG",
  "substanceType_hs": "NPO_1486",
  "s_uuid_hs": "NNRG-18280a4a-45e9-adc0-df3b-125397b1255f"
}


In [None]:
#print("Substances: {}".format(len(rows)))
results = pd.DataFrame(rows)
results.to_csv(section+".nosmiles.txt",sep='\t',index=False)
#df.head()
results.head()

In [None]:
results.columns

In [None]:
import numpy as np
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: red' if v else '' for v in is_max]

for criteria in ["value.range.lo"]:
    tmp = pd.pivot_table(results, values=criteria, index=['m.public.name'], columns=['p.oht.module','p.oht.section','p.guidance','value.endpoint','value.endpoint_type','value.range.lo.qualifier','value.unit'], aggfunc=np.mean).reset_index()
    #tmp.style.highlight_null(null_color='red')
    
    #display(tmp.style.apply(highlight_max,subset=top_sections))
    display(tmp.style.apply(highlight_max))

In [None]:
tmp=results.groupby(by=["m.public.name","p.guidance","value.endpoint","value.endpoint_type","value.range.lo.qualifier","value.unit"]).agg({"value.range.lo" : ["min","max","mean","std","count"]}).reset_index()
tmp.columns = ["_".join(x) for x in tmp.columns.ravel()]
print("Substances {}".format(tmp.shape[0]))
display(tmp)


In [None]:
.