# Retrieve data from eNanomapper database
https://search.data.enanomapper.net/

- This notebook uses Apache Solr API and AMBIT REST API  
- see OpenAPI3 interactive documentation at https://api.ideaconsult.net

In [None]:
from importlib import reload 
import yaml
from pynanomapper import aa
from pynanomapper import units
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import requests

from pathlib import Path

import measurement
from pynanomapper import client_solr
from pynanomapper import client_ambit
from pynanomapper import annotation

import pandas as pd
import os.path
import numpy as np
import datetime, time
import json
import sys
import ipywidgets as widgets

import logging
from logging.config import fileConfig
fileConfig('logging_endpoints_config.ini')

global logger
logger = logging.getLogger()

logger.debug('Started at %s \t%s',os.name, datetime.datetime.now())
import warnings
warnings.simplefilter("ignore")

OUTPUT_DIR="output"
output_dir = Path(OUTPUT_DIR)
output_dir.mkdir(exist_ok=True)

### Select enanoMapper aggregated search service

In [None]:
print('Select enanoMapper aggregated search service:')
style = {'description_width': 'initial'}
config,config_servers, config_security, auth_object, msg = aa.parseOpenAPI3()    
service_widget = widgets.Dropdown(
    options=config_servers['url'],
    description='Service:',
    disabled=False,
    style=style
)
if config_security is None:
    service = interactive(aa.search_service_open,url=service_widget)
else:
    print(msg)
    apikey_widget=widgets.Text(
            placeholder='',
            description=config_security,
            disabled=False,
            style=style
    )    
    service = interactive(aa.search_service_protected,url=service_widget,apikey=apikey_widget)    

display(service)

In [None]:
service_uri=service_widget.value
if auth_object!=None:
    auth_object.setKey(apikey_widget.value)
print("Sending queries to {}".format(service_uri))


### Retrieve information about endpoint types 

In [None]:
facets = client_solr.Facets()

df = facets.summary(service_uri,auth_object, query="*:*",fields=["topcategory_s","endpointcategory_s"])    
df.head()


### 

In [None]:
top_widget = widgets.Dropdown(
    options=df['topcategory_s'].unique(),
    value="P-CHEM",
    description='Select:',
    disabled=False,
)
category_widget = widgets.Dropdown(
    options=list(df[df['topcategory_s']=="P-CHEM"][["endpointcategory_name","endpointcategory_s"]].itertuples(index=False,name=None)),
    #value=,
    description='Category:',
    layout=Layout(width='50%'),
    disabled=False,
)
freetext_widget=widgets.Text(
    value='NM220,NM101',
    description='Free text query',
    layout=Layout(width='50%'),
    disabled=False
)
endpoint_widget=widgets.Text(
    value='*',
    description='Endpoint',
    layout=Layout(width='50%'),
    disabled=False
)
def define_query(_top,_section,_freetext,_endpoint):
    #category_widget.options=df[df['topcategory_s']==top]['endpointcategory_s'].unique()
    filtered = df[df['topcategory_s']==_top]
    category_widget.options = list(filtered[["endpointcategory_name","endpointcategory_s"]].itertuples(index=False,name=None))
    top = _top
    section= _section
    
    
interact(define_query,_top= top_widget,_section=category_widget,_freetext=freetext_widget,_endpoint=endpoint_widget)

### Setup the query

In [None]:
top = top_widget.value
section = category_widget.value
materialfilter=freetext_widget.value
endpoint=endpoint_widget.value
if "" == materialfilter:
    materialfilter=None
logger.info('{}\t{}\t{}\t{}'.format(top,section,materialfilter,endpoint))

docs_query = client_solr.StudyDocuments()
docs_query.settings['endpointfilter'] = ' effectendpoint_s: {}'.format(endpoint)
docs_query.settings['query_guidance'] = None
docs_query.settings['query_organism'] = None
docs_query.setStudyFilter({' topcategory_s': top, 'endpointcategory_s':section}) 
docs_query.settings['fields'] = "*"                    
query = docs_query.getQuery(textfilter=materialfilter,facets=None,fq=None, rows=10, _params=True, _conditions=True, _composition=False );
logger.info(query)

### Run the query

In [None]:
r = client_solr.get(service_uri,query=query,auth=auth_object)
logger.info(r.status_code)
docs=r.json()['response']['docs']
rows = docs_query.parse(docs,process=None)



### The search results - display and save in a file

In [None]:
results_filename = section + ".nosmiles.txt"
results_filepath = output_dir / results_filename
results = pd.DataFrame(rows)
results.to_csv(results_filepath, sep='\t', index=False)
results.head()

In [None]:
results.columns

In [None]:
import numpy as np
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: red' if v else '' for v in is_max]

for criteria in ["value.range.lo"]:
    tmp = pd.pivot_table(results, values=criteria, index=['m.public.name'], 
    columns=['p.oht.module','p.oht.section','value.endpoint','p.guidance','value.endpoint_type','value.range.lo.qualifier','value.unit'], aggfunc=np.mean).reset_index()
    #tmp.style.highlight_null(null_color='red')
    
    #display(tmp.style.apply(highlight_max,subset=top_sections))
    display(tmp.style.apply(highlight_max))

In [None]:
tmp=results.groupby(by=["m.public.name","p.guidance","value.endpoint","value.endpoint_type","value.range.lo.qualifier","value.unit"]).agg({"value.range.lo" : ["min","max","mean","std","count"]}).reset_index()
tmp.columns = ["_".join(x) for x in tmp.columns.ravel()]
print("Substances {}".format(tmp.shape[0]))
display(tmp.fillna(''))


.