## Connect to the NHANES data resource using the HPDS Adapter

In [1]:
library(PicSureClient)
library(PicSureHpdsLib)

adapter <- PicSureHpdsLib::BypassAdapter$new("http://pic-sure-hpds-nhanes:8080/PIC-SURE")
resource <- adapter$useResource()

### A "resource" allows access to the Data Dictionary and Query Engine of the underlying  data source

In [2]:
resource$help()

        [HELP] PicSureHpdsLib::Adapter$useResource(resource_uuid)
            $dictionary()       Used to access data dictionary of the resource
            $query()            Used to query against data in the resource
        [ENVIRONMENT]
            URL: http://pic-sure-hpds-nhanes:8080/PIC-SURE/
  Resource UUID: FALSE


## Create a new query instance and use .help() to see what can be done with it

In [3]:
query <- resource$query()
query$help()

        $select()   list of data fields to return from resource for each record
        $require()  list of data fields that must be present in all returned records
        $filter()   list of data fields and conditions that returned records satisfy
           [ Filter keys exert an AND relationship on returned records      ]
           [ Categorical values have an OR relationship on their key        ]
           [ Numerical Ranges are inclusive of their start and end points   ]

        $getCount()             returns a count indicating the number of matching numbers
        $getResults()           returns a CSV-like string containing the matching records
        $getResultsDataFrame()  returns a pandas DataFrame containing the matching records
        $getRunDetails()        returns details about the last run of the query
        $getQueryCommand()      returns the JSON-formatted query request
        $show()                 lists all current query parameters

        * $getCount(), 

### Use .help() to see how to add entries to the selection criteria

In [4]:
query$select()$help()


          select()$
             add("key")            add a single column to be returned in results
             delete("key")         delete a single column from the list of columns to return
             show()                lists all current columns that will be returned in results
             clear()               clears all values from the select list

### Use a chained of commands on the dictionary object to include all labs in the query's selection list

In [5]:
lab_keys <- resource$dictionary()$find("laboratory")$keys()
query$select()$add(lab_keys)

No encoding supplied: defaulting to UTF-8.
In if (has.key(key, self$data) == TRUE) {: the condition has length > 1 and only the first element will be used

### Use the .show() command to see what is currently selected for the query

In [6]:
query$show()

{
    "query": {
        "fields": [
            "\\laboratory\\pcbs\\PCB157 (ng per g)\\",
            "\\laboratory\\biochemistry\\Creatinine, urine (umol per L)\\15912\\",
            "\\laboratory\\nutrients\\trans-b-carotene(ug per dL)\\",
            "\\laboratory\\biochemistry\\1 Creatinine (mg per dL)\\",
            "\\laboratory\\biochemistry\\Creatinine, urine (umol per L)\\33415\\",
            "\\laboratory\\volatile compounds\\Trichloroethene (ug per cubic meter)\\",
            "\\laboratory\\acrylamide\\Glycideamide (pmoL per G Hb)\\",
            "\\laboratory\\pesticides\\Diaminochloroatrazine (ug per L)\\",
            "\\laboratory\\biochemistry\\Creatinine, urine (umol per L)\\14586\\",
            "\\laboratory\\nutrients\\Phytofluene(ug per dL)\\",
            "\\laboratory\\volatile compounds\\Toluene (ug per cubic meter)\\",
            "\\laboratory\\hormone\\Insulin: SI(pmol per L)\\",
            "\\laboratory\\dioxins\\1,2,3,7,8,9-hxcdd (fg per g)\\",
     

### Clear the current selection list and add only "\\laboratory\\hormone\\" results

In [7]:
query$select()$clear()
query$select()$add(resource$dictionary()$find("\\laboratory\\hormone\\")$keys())
query$select()$show()

No encoding supplied: defaulting to UTF-8.
In if (has.key(key, self$data) == TRUE) {: the condition has length > 1 and only the first element will be used

[
    "\\laboratory\\hormone\\Follicle stimulating hormone (mIU per mL)\\",
    "\\laboratory\\hormone\\Thyroxine (T4) (ug per dL)\\",
    "\\laboratory\\hormone\\Parathyroid Hormone(Elecys method) pg per mL\\",
    "\\laboratory\\hormone\\",
    "\\laboratory\\hormone\\Insulin (uU per mL)\\",
    "\\laboratory\\hormone\\Luteinizing hormone (mIU per mL)\\",
    "\\laboratory\\hormone\\Thyroid stim hormone (TSH) (IU per L)\\",
    "\\laboratory\\hormone\\Insulin: SI(pmol per L)\\"
]
 

### Run some commands on the dictionary to find and then filter the query by gender 

In [8]:
gender <- resource$dictionary()$find("sex")
gender$help()

No encoding supplied: defaulting to UTF-8.


        [HELP] PicSureHpdsLib$Adapter(connection)$useResource(uuid)$dictionary()$find(term)
            $count()        Returns the number of entries in the dictionary that match the given term
            $keys()         Return the keys of the matching entries
            $entries()      Return a list of matching dictionary entries
            $DataFrame()    Return the entries in a DataFrame-compatible format

        [Examples]
            results = PicSureHpdsLib$Adapter(connection)$useResource(uuid)$dictionary()$find('asthma')
            df = results$DataFrame()


In [9]:
gender$DataFrame()

Unnamed: 0,name,categorical,categoryValues,observationCount,HpdsDataType,min,max
1,\questionnaire\sexual behavior\Are you circumcised or uncircumcised\,True,"No,Yes",5178,phenotypes,,
2,\demographics\SEX\,True,"female,male",41474,phenotypes,,
3,\questionnaire\sexual behavior\Ever had sexual intercourse\,True,"No,Yes",8271,phenotypes,,
4,\questionnaire\sexual behavior\,False,,41474,phenotypes,0.0,17040.0


### Add a filter on our query to filter results to only female subjects

In [10]:
query$filter()$help()


            filter()$
              add("key", value)                  - or -
              add("key", "value")               filter to records with KEY column that equals VALUE
              add("key", ["value1", "value2"])  filter to records with KEY column equalling one value within the given list
              add("key", start, end)            filter to records with KEY column value between START and END (inclusive)
                                                    start -or- end may be set to None to filter by a max or min value
              delete("key")                     delete a filter from the list of filters
              show()                            lists all current filters that results records must satisfy
              clear()                           clears all values from the filters list

In [11]:
query$filter()$add("\\demographics\\SEX\\", list("female"))

## View our query's criterion as it currently stands

In [12]:
query$show()

{
    "query": {
        "fields": [
            "\\laboratory\\hormone\\Follicle stimulating hormone (mIU per mL)\\",
            "\\laboratory\\hormone\\Thyroxine (T4) (ug per dL)\\",
            "\\laboratory\\hormone\\Parathyroid Hormone(Elecys method) pg per mL\\",
            "\\laboratory\\hormone\\",
            "\\laboratory\\hormone\\Insulin (uU per mL)\\",
            "\\laboratory\\hormone\\Luteinizing hormone (mIU per mL)\\",
            "\\laboratory\\hormone\\Thyroid stim hormone (TSH) (IU per L)\\",
            "\\laboratory\\hormone\\Insulin: SI(pmol per L)\\"
        ],
        "requiredFields": [

        ],
        "numericFilters": {

        },
        "categoryFilters": {
            "\\demographics\\SEX\\": [
                "female"
            ]
        },
        "expectedResultType": "DATAFRAME"
    }
}
 


### Run the query and display the results

In [13]:
query$help()

        $select()   list of data fields to return from resource for each record
        $require()  list of data fields that must be present in all returned records
        $filter()   list of data fields and conditions that returned records satisfy
           [ Filter keys exert an AND relationship on returned records      ]
           [ Categorical values have an OR relationship on their key        ]
           [ Numerical Ranges are inclusive of their start and end points   ]

        $getCount()             returns a count indicating the number of matching numbers
        $getResults()           returns a CSV-like string containing the matching records
        $getResultsDataFrame()  returns a pandas DataFrame containing the matching records
        $getRunDetails()        returns details about the last run of the query
        $getQueryCommand()      returns the JSON-formatted query request
        $show()                 lists all current query parameters

        * $getCount(), 

In [14]:
query$getCount()

No encoding supplied: defaulting to UTF-8.


In [15]:
query$getResultsDataFrame()

No encoding supplied: defaulting to UTF-8.


Unnamed: 0,Patient.ID,X.demographics.SEX.,X.laboratory.hormone.,X.laboratory.hormone.Follicle.stimulating.hormone..mIU.per.mL..,X.laboratory.hormone.Insulin..uU.per.mL..,X.laboratory.hormone.Insulin..SI.pmol.per.L..,X.laboratory.hormone.Luteinizing.hormone..mIU.per.mL..,X.laboratory.hormone.Parathyroid.Hormone.Elecys.method..pg.per.mL.,X.laboratory.hormone.Thyroid.stim.hormone..TSH...IU.per.L..,X.laboratory.hormone.Thyroxine..T4...ug.per.dL..
1,3,female,,,,,,,,
2,4,female,,46.1,,,32.44,,0.85,7.8
3,6,female,,,,,,,,
4,8,female,,25.99,8.3,49.8,8.65,,2.5,7.8
5,11,female,,,,,,,0.91,9.4
6,12,female,,,7.04,42.24,,,,
7,13,female,0.9,,12.92,77.52,,,,
8,14,female,,,,,,,,
9,15,female,,,29.5,,,56,,
10,16,female,,,15.73,94.38,,,1.88,8.3


# You can even work with multiple queries at the same time without having them accidently interact with each other!

In [16]:
query2 <- resource$query()
query3 <- resource$query()


query2$select()$add(resource$dictionary()$find("\\demographics\\AGE")$keys())
query3$select()$add(resource$dictionary()$find("\\demographics\\AGE")$keys())


#query2$filter()$add(resource$dictionary()$find("\\demographics\\AGE").keys(), min=60)
#query3$filter()$add(resource$dictionary()$find("\\demographics\\AGE").keys(), max=20)
query2$filter()$add(resource$dictionary()$find("\\demographics\\AGE")$keys(), 60, NULL)
query3$filter()$add(resource$dictionary()$find("\\demographics\\AGE")$keys(), NULL, 20)

too_old <- query2$getResultsDataFrame()
too_young <- query3$getResultsDataFrame()

No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.


In [23]:
too_young

Unnamed: 0,Patient.ID,X.demographics.AGE.
1,3,0
2,5,18
3,6,4
4,11,17
5,19,12
6,21,2
7,24,1
8,25,18
9,29,17
10,30,19


In [24]:
too_old

Unnamed: 0,Patient.ID,X.demographics.AGE.
1,2,85
2,12,71
3,23,73
4,27,62
5,31,72
6,32,85
7,35,62
8,37,68
9,39,80
10,55,60
