# Retrieve data from eNanomapper database
https://search.data.enanomapper.net/

- This notebook uses Apache Solr API and AMBIT REST API  
- see OpenAPI3 interactive documentation at https://api.ideaconsult.net

In [None]:
from importlib import reload 
import yaml
from solrscope import aa
from solrscope import units
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import requests

import measurement
from solrscope import client_solr
from solrscope import client_ambit
from solrscope import annotation

import pandas as pd
import os.path
import numpy as np
import datetime, time
import json
import sys
import ipywidgets as widgets

import logging
from logging.config import fileConfig
fileConfig('logging_endpoints_config.ini')

global logger
logger = logging.getLogger()

logger.debug('Started at %s \t%s',os.name, datetime.datetime.now())
import warnings
warnings.simplefilter("ignore")

OUTPUT="output/"

### Retrieve endpoints 

In [2]:
print('Select enanoMapper aggregated search service:')
style = {'description_width': 'initial'}
config,config_servers, config_security, auth_object, msg = aa.parseOpenAPI3()    
service_widget = widgets.Dropdown(
    options=config_servers['url'],
    description='Service:',
    disabled=False,
    style=style
)
if config_security is None:
    service = interactive(aa.search_service_open,url=service_widget)
else:
    print(msg)
    apikey_widget=widgets.Text(
            placeholder='',
            description=config_security,
            disabled=False,
            style=style
    )    
    service = interactive(aa.search_service_protected,url=service_widget,apikey=apikey_widget)    

display(service)

Select enanoMapper aggregated search service:
Enter `X-Gravitee-Api-Key` you have received upon subscription to http://api.ideaconsult.net


interactive(children=(Dropdown(description='Service:', options=('https://api.ideaconsult.net/enanomapper', 'ht…

In [3]:
service_uri=service_widget.value
if auth_object!=None:
    auth_object.setKey(apikey_widget.value)
print("Sending queries to {}".format(service_uri))


Sending queries to https://api.ideaconsult.net/gracious


In [4]:
facets = client_solr.Facets()

df = facets.summary(service_uri,auth_object, query="*:*",fields=["topcategory_s","endpointcategory_s"])    
df.head()


Unnamed: 0,topcategory_s,endpointcategory_s,Number of data points,endpointcategory_term,endpointcategory_name
0,TOX,ENM_0000068_SECTION,23071,http://www.bioassayontology.org/bao#ENM_0000068,CellViability
1,TOX,NPO_1339_SECTION,9488,http://purl.obolibrary.org/obo/NPO_1339,Immunotoxicity
2,TOX,TO_GENETIC_IN_VITRO_SECTION,8646,http://www.bioassayontology.org/bao#BAO_0002167,Genetic toxicity invitro
3,TOX,ENM_0000044_SECTION,6290,http://purl.enanomapper.org/onto/ENM_0000044,Barrier integrity
4,TOX,ENM_0000037_SECTION,5110,http://www.bioassayontology.org/bao#ENM_0000037,Oxidative Stress


In [5]:
top_widget = widgets.Dropdown(
    options=df['topcategory_s'].unique(),
    value="P-CHEM",
    description='Select:',
    disabled=False,
)
category_widget = widgets.Dropdown(
    options=list(df[df['topcategory_s']=="P-CHEM"][["endpointcategory_name","endpointcategory_s"]].itertuples(index=False,name=None)),
    #value=,
    description='Category:',
    disabled=False,
)
freetext_widget=widgets.Text(
    value='NM220,NM101',
    description='Free text query',
    disabled=False
)
endpoint_widget=widgets.Text(
    value='*',
    description='Endpoint',
    disabled=False
)
def define_query(_top,_section,_freetext,_endpoint):
    #category_widget.options=df[df['topcategory_s']==top]['endpointcategory_s'].unique()
    filtered = df[df['topcategory_s']==_top]
    category_widget.options = list(filtered[["endpointcategory_name","endpointcategory_s"]].itertuples(index=False,name=None))
    top = _top
    section= _section
    
    
interact(define_query,_top= top_widget,_section=category_widget,_freetext=freetext_widget,_endpoint=endpoint_widget)

interactive(children=(Dropdown(description='Select:', index=1, options=('TOX', 'P-CHEM', 'ECOTOX'), value='P-C…

<function __main__.define_query(_top, _section, _freetext, _endpoint)>

#### Setup the query

In [27]:
top = top_widget.value
section = category_widget.value
materialfilter=freetext_widget.value
endpoint=endpoint_widget.value
if "" == materialfilter:
    materialfilter=None
logger.info('{}\t{}\t{}\t{}'.format(top,section,materialfilter,endpoint))

docs_query = client_solr.StudyDocuments()
docs_query.settings['endpointfilter'] = ' effectendpoint_s: {}'.format(endpoint)
docs_query.settings['query_guidance'] = None
docs_query.settings['query_organism'] = None
docs_query.setStudyFilter({' topcategory_s': top, 'endpointcategory_s':section}) 
                    
query = docs_query.getQuery(textfilter=materialfilter,facets=None,fq=None, rows=10, _params=True, _conditions=True, _composition=False );
logger.info(query)

2019-12-16 10:40:38,488  INFO     P-CHEM	PC_DENSITY_SECTION	GRACIOUS	*
2019-12-16 10:40:38,488  INFO     {'q': '{!parent which=type_s:substance}(GRACIOUS)', 'fq': None, 'wt': 'json', 'fl': 'dbtag_hss,name_hs,publicname_hs,substanceType_hs,owner_name_hs,s_uuid_hs,substance_annotation_hss,[child parentFilter=filter(type_s:substance) childFilter="filter(type_s:study AND     topcategory_s:P-CHEM AND endpointcategory_s:PC_DENSITY_SECTION AND  effectendpoint_s: *)  OR filter(type_s:params AND     topcategory_s:P-CHEM AND endpointcategory_s:PC_DENSITY_SECTION)  OR filter(type_s:conditions AND     topcategory_s:P-CHEM AND endpointcategory_s:PC_DENSITY_SECTION) " limit=10000]', 'json.facet': '', 'rows': 10}


### Run the query

In [28]:
r = client_solr.get(service_uri,query=query,auth=auth_object)
logger.info(r.status_code)
docs=r.json()['response']['docs']
#print(docs)
rows = docs_query.parse(docs)
#,process=None)



2019-12-16 10:40:39,728  INFO     200
2019-12-16 10:40:39,729  INFO     {
  "dbtag_hss": [
    "GRCS"
  ],
  "name_hs": "organic pigment Red 254 transparent, diketopyrrolopyrrole, 43nm, hydrophobic",
  "publicname_hs": "DPP_nano",
  "owner_name_hs": "GRACIOUS",
  "substanceType_hs": "CHEBI_59999",
  "s_uuid_hs": "GRCS-7bd6de68-a312-3254-8b3f-9f46d6976ce6",
  "substance_annotation_hss": [
    "Pigments",
    "Organic pigments",
    "Case study materials"
  ],
  "_childDocuments_": [
    {
      "Amount of powder_UNIT_s": "mg",
      "Amount of powder_d": 1560.2,
      "DISPERSION MEDIUM_s": "no",
      "E.method_s": "He pycnometry",
      "E.sop_reference_s": "",
      "He-quality_s": "Helium 4.6",
      "T.Calibration material Volume_s": "",
      "T.samplepreparation_s": "Accupyc 1340 automatic gas pycnometer",
      "T.temperature_UNIT_s": "\u2103",
      "T.temperature_d": 20.0,
      "document_uuid_s": "GRCS-3c198750-d482-3f41-be04-4edfba0e8089",
      "endpointcategory_s": "PC_DEN

In [29]:
doc=docs[0]
';'.join(doc['substance_annotation_hss'])

'Pigments;Organic pigments;Case study materials'

In [30]:
#print("Substances: {}".format(len(rows)))
results = pd.DataFrame(rows)
results.to_csv(OUTPUT + section+".nosmiles.txt",sep='\t',index=False)
#df.head()
results.head()

Unnamed: 0,db,m.materialprovider,m.public.name,m.substance.name,m.substance.type,p.guidance,p.oht.module,p.oht.section,p.reference,p.reference_year,...,x.params.g_d,x.params.guidance,x.params.material state,x.params.n of he purging cycle of the chamber before the measurement_d,x.params.number cycles per sample/measurement_d,x.params.replicates for each measurements_d,xR.purposeFlag,xR.reliability,xR.studyResultType,xx.QualityRemark
0,GRCS,GRACIOUS,DPP_nano,"organic pigment Red 254 transparent, diketopyr...",CHEBI_59999,DIN EN ISO 1183-3,P-CHEM,PC_DENSITY_SECTION,,0,...,,DIN EN ISO 1183-3,powder,10.0,10.0,2.0,,,,
1,GRCS,GRACIOUS,DPP_non-nano,"organic pigment Red 254 opaque, diketopyrrolop...",CHEBI_59999,DIN EN ISO 1183-3,P-CHEM,PC_DENSITY_SECTION,,0,...,,DIN EN ISO 1183-3,powder,10.0,10.0,2.0,,,,
2,GRCS,GRACIOUS,DPP_premixed,"Organic pigment Red 254 opaque, diketopyrrolop...",CHEBI_59999,DIN EN ISO 1183-3,P-CHEM,PC_DENSITY_SECTION,,0,...,,DIN EN ISO 1183-3,powder,10.0,10.0,2.0,,,,
3,GRCS,GRACIOUS,CuPhthalo_nano,"Organic pigment Blue 15, Cu-Phthalocyanine 17n...",CHEBI_59999,DIN EN ISO 1183-3,P-CHEM,PC_DENSITY_SECTION,,0,...,,DIN EN ISO 1183-3,powder,10.0,10.0,2.0,,,,
4,GRCS,GRACIOUS,CuPhthalo_halogen,"Organic pigment Green 7, CuPhthalocyanine 14nm...",CHEBI_59999,DIN EN ISO 1183-3,P-CHEM,PC_DENSITY_SECTION,,0,...,,DIN EN ISO 1183-3,powder,10.0,10.0,2.0,,,,


In [31]:
results.columns

Index(['db', 'm.materialprovider', 'm.public.name', 'm.substance.name',
       'm.substance.type', 'p.guidance', 'p.oht.module', 'p.oht.section',
       'p.reference', 'p.reference_year', 'p.study_provider', 'uuid.assay',
       'uuid.document', 'uuid.investigation', 'uuid.substance',
       'value.endpoint', 'value.endpoint_synonym', 'value.endpoint_type',
       'value.range.lo', 'value.range.lo.qualifier', 'value.range.up',
       'value.range.up.qualifier', 'value.text', 'value.uncertainty',
       'value.uncertainty_type', 'value.unit', 'x.conditions.E.method',
       'x.conditions.effectid_hs', 'x.conditions.guidance',
       'x.conditions.replicate_d', 'x.params.Amount of powder_UNIT',
       'x.params.Amount of powder_d', 'x.params.Centrifugation time_UNIT',
       'x.params.Centrifugation time_d', 'x.params.DISPERSION MEDIUM',
       'x.params.E.method', 'x.params.E.sop_reference',
       'x.params.Energy of sonication_UNIT', 'x.params.Energy of sonication_d',
       'x.params

In [32]:
import numpy as np
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: red' if v else '' for v in is_max]

for criteria in ["value.range.lo"]:
    tmp = pd.pivot_table(results, values=criteria, index=['m.public.name'], 
    columns=['p.oht.module','p.oht.section','value.endpoint','p.guidance','value.endpoint_type','value.range.lo.qualifier','value.unit'], aggfunc=np.mean).reset_index()
    #tmp.style.highlight_null(null_color='red')
    
    #display(tmp.style.apply(highlight_max,subset=top_sections))
    display(tmp.style.apply(highlight_max))

p.oht.module,m.public.name,P-CHEM,P-CHEM
p.oht.section,Unnamed: 1_level_1,PC_DENSITY_SECTION,PC_DENSITY_SECTION
value.endpoint,Unnamed: 1_level_2,DENSITY_SKELETAL,EFFECTIVE DENSITY
p.guidance,Unnamed: 1_level_3,DIN EN ISO 1183-3,"NANOREG DISPERSION PROTOCOL, VOLUMETRIC CELL MEASURMENT (VCM), ADAPTED FROM DELOID ET AL 2014, 2017"
value.endpoint_type,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4
value.range.lo.qualifier,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5
value.unit,Unnamed: 1_level_6,g/cm3,g/cm3
0,CuPhthalo_halogen,2.1002,
1,CuPhthalo_nano,1.5825,
2,DPP_nano,1.6289,
3,DPP_non-nano,1.6299,
4,DPP_premixed,1.6425,
5,Fe2O3_nano_A,4.2612,
6,Fe2O3_nano_B,5.1894,
7,JRCNM01100a,5.67,
8,JRCNM02102a,7.12,2.21419
9,JRCNM50001a,4.13,2.03414


In [33]:
tmp=results.groupby(by=["m.public.name","p.guidance","value.endpoint","value.endpoint_type","value.range.lo.qualifier","value.unit"]).agg({"value.range.lo" : ["min","max","mean","std","count"]}).reset_index()
tmp.columns = ["_".join(x) for x in tmp.columns.ravel()]
print("Substances {}".format(tmp.shape[0]))
display(tmp.fillna(''))


Substances 12


Unnamed: 0,m.public.name_,p.guidance_,value.endpoint_,value.endpoint_type_,value.range.lo.qualifier_,value.unit_,value.range.lo_min,value.range.lo_max,value.range.lo_mean,value.range.lo_std,value.range.lo_count
0,CuPhthalo_halogen,DIN EN ISO 1183-3,DENSITY_SKELETAL,,,g/cm3,2.1002,2.1002,2.1002,,1
1,CuPhthalo_nano,DIN EN ISO 1183-3,DENSITY_SKELETAL,,,g/cm3,1.5825,1.5825,1.5825,,1
2,DPP_nano,DIN EN ISO 1183-3,DENSITY_SKELETAL,,,g/cm3,1.6289,1.6289,1.6289,,1
3,DPP_non-nano,DIN EN ISO 1183-3,DENSITY_SKELETAL,,,g/cm3,1.6299,1.6299,1.6299,,1
4,DPP_premixed,DIN EN ISO 1183-3,DENSITY_SKELETAL,,,g/cm3,1.6425,1.6425,1.6425,,1
5,Fe2O3_nano_A,DIN EN ISO 1183-3,DENSITY_SKELETAL,,,g/cm3,4.2612,4.2612,4.2612,,1
6,Fe2O3_nano_B,DIN EN ISO 1183-3,DENSITY_SKELETAL,,,g/cm3,5.1894,5.1894,5.1894,,1
7,JRCNM01100a,DIN EN ISO 1183-3,DENSITY_SKELETAL,,,g/cm3,5.67,5.67,5.67,,1
8,JRCNM02102a,DIN EN ISO 1183-3,DENSITY_SKELETAL,,,g/cm3,7.12,7.12,7.12,,1
9,JRCNM02102a,"NANOREG DISPERSION PROTOCOL, VOLUMETRIC CELL M...",EFFECTIVE DENSITY,,,g/cm3,2.214185,2.214185,2.214185,,1


.