# Retrieve data from eNanomapper database
https://search.data.enanomapper.net/

- This notebook uses Apache Solr API and AMBIT REST API  
- see OpenAPI3 interactive documentation at https://api.ideaconsult.net

In [None]:
from importlib import reload 
import yaml
from solrscope import aa
from solrscope import units
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import requests

import measurement
from solrscope import client_solr
from solrscope import client_ambit
from solrscope import annotation

import pandas as pd
import os.path
import numpy as np
import datetime, time
import json
import sys
import ipywidgets as widgets

import logging
from logging.config import fileConfig
fileConfig('logging_endpoints_config.ini')

global logger
logger = logging.getLogger()

%run units.py

logger.debug('Started at %s \t%s',os.name, datetime.datetime.now())



### Retrieve endpoints using AMBIT REST API

In [None]:
print('Select enanoMapper aggregated search service:')
style = {'description_width': 'initial'}
config,config_servers, config_security, auth_object, msg = aa.parseOpenAPI3()    
service_widget = widgets.Dropdown(
    options=config_servers['url'],
    description='Service:',
    disabled=False,
    style=style
)
if config_security is None:
    service = interactive(aa.search_service_open,url=service_widget)
else:
    print(msg)
    apikey_widget=widgets.Text(
            placeholder='',
            description=config_security,
            disabled=False,
            style=style
    )    
    service = interactive(aa.search_service_protected,url=service_widget,apikey=apikey_widget)    

display(service)

In [None]:
service_uri=service_widget.value
if auth_object!=None:
    auth_object.setKey(apikey_widget.value)
print("Sending queries to {}".format(service_uri))
facets = client_solr.Facets()

In [None]:
ambit_uri=service_uri + "/enm/nanoreg1"
ambit=client_ambit.AMBITQuery(root_uri=ambit_uri,key="/study")
print(ambit.uricompose())
results = ambit.get(auth=auth_object,page=0,pagesize=1000)
data = results.json()

In [None]:
#Select endpoint
_sections={}

top_sections = []
for facet in data['facet']:
    #print("{}\t{}\t{}\t{}".format(facet['subcategory'],facet['endpoint'],facet['count'],facet['value']))
    top = facet['subcategory']
    if not top in top_sections:
        top_sections.append(top)
        
    if not top in _sections:
        _sections[top] = []
    
    _sections[top].append(facet['endpoint'])

top_widget = widgets.Dropdown(
    options=top_sections,
    value=top_sections[0],
    description='Select:',
    disabled=False,
)
display(top_widget)


In [None]:
top=top_widget.value
category_widget = widgets.Dropdown(
    options=_sections[top],
    value=_sections[top][1],
    description='Endpoint:',
    disabled=False,
)
display(category_widget)

#### Setup the query

In [None]:
freetext_widget=widgets.Text(
    value='NM220,NM101',
    description='Search',
    disabled=False
)
freetext_widget

In [None]:
materialfilter=freetext_widget.value
if "" == materialfilter:
    materialfilter=None
    
section=category_widget.value
fields=None

endpoint="*"

docs_query = client_solr.StudyDocuments()
docs_query.settings['endpointfilter'] = ' effectendpoint_s: {}'.format(endpoint)
docs_query.settings['query_guidance'] = None
docs_query.settings['query_organism'] = None
docs_query.settings['fields'] = None
docs_query.setStudyFilter({' topcategory_s': top, 'endpointcategory_s':section}) 
                    
query = docs_query.getQuery(textfilter=materialfilter,facets=None,fq=None, rows=10, _params=True, _conditions=True, _composition=False );
logger.info(query)

### Run the query

In [None]:
#rows = []  
print(url)
r = client_solr.get(service_uri,query=query,auth=auth_object)
print(r.status_code)
docs=r.json()['response']['docs']
#print(docs)
rows = docs_query.parse(docs)



In [None]:
#print("Substances: {}".format(len(rows)))
df = pd.DataFrame(rows)
df.to_csv(section+".nosmiles.txt",sep='\t',index=False)
#df.head()
df.head()

In [None]:
df.columns

In [None]:
import numpy as np
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: red' if v else '' for v in is_max]

for criteria in ["value.range.lo"]:
    tmp = pd.pivot_table(df, values=criteria, index=['m.public.name'], columns=['p.oht.module','p.oht.section','p.guidance','value.endpoint','value.endpoint_type','value.range.lo.qualifier','value.unit'], aggfunc=np.mean).reset_index()
    #tmp.style.highlight_null(null_color='red')
    
    display(tmp.style.apply(highlight_max,subset=top_sections))

In [None]:
tmp=df.groupby(by=["m.public.name","p.guidance","value.endpoint","value.endpoint_type","value.range.lo.qualifier","value.unit"]).agg({"value.range.lo" : ["min","max","mean","std","count"]}).reset_index()
tmp.columns = ["_".join(x) for x in tmp.columns.ravel()]
print("Substances {}".format(tmp.shape[0]))
display(tmp)
