# Load all the necessary libraries, these should already be installed.

### If you need additional libraries, add them to the dependencies.r files and rebuild the image


In [None]:
library(knitr)
library(plotly)
library(httr)
library(jsonlite)
library(purrr)
library(magrittr)
library(GetoptLong)

# Functions that will eventually be in an R library and maintained alongside the API

### This is PIC-SURE-HPDS specific code, the same functionality is available through PIC-SURE 2's HPDS resource, but this saves you the PIC-SURE ceremonial wrapping of JSON in more JSON.


In [None]:

escapeSlashes <- function(field){
    return (gsub("\\", "\\\\", field, fixed=TRUE))
}

wrapInQuotes <- function(value){
    return (paste('"', escapeSlashes(value), '"', sep=''))
}

numericFilter <- function(field, min, max){
    field <- escapeSlashes(field)
    return (qq('"@{field}":{"min":@{min},"max":@{max}}'))
}

categoryFilter <- function(field, categories){
    field <- escapeSlashes(field)
    categories <- paste(map(categories, wrapInQuotes), collapse=', ')
    return (qq('"@{field}":[@{categories}]'))
}


dictionary <- function(query){
    response <- POST("http://pic-sure-hpds-nhanes:8080/PIC-SURE/search/", body=paste('{"query":"',query,'"}', sep=""), content_type_json())
    response
    mergeCategoryValues <- function(entry){
        entry$categoryValues
        entry$categoryValues <- paste(entry$categoryValues, collapse=", ")
        return (entry)        
    }
    
    return(map_df(content(response)$results, mergeCategoryValues))
}    

dictionaryFields <- function(query){
    as.list(dictionary(query)$name)
}

query <- function(selectedFields = NULL, requiredFields = NULL, numericFilters = NULL, categoryFilters = NULL) {
    fields <- function(selectedFields){
        paste(map(selectedFields, wrapInQuotes), collapse=', ')
    }

    query <- qq(
        '{"fields":[@{fields}],
        "requiredFields":[@{requiredFields}],
        "numericFilters":{@{numericFilters}},
        "categoryFilters":{@{categoryFilters}}}', 
          envir = list(
              fields = fields(selectedFields), 
              requiredFields = fields(requiredFields), 
              numericFilters = paste(numericFilters, sep=","), 
              categoryFilters = paste(categoryFilters, sep=",")))
    query
    response <- POST("http://pic-sure-hpds-nhanes:8080/query", body=query, content_type_json())
    content(response)
}

getStatus <- function(queryId){
    response <- GET(paste("http://pic-sure-hpds-nhanes:8080/query", queryId, "status", sep="/"))
    content(response)
}

getResult <- function(queryId){
    return(read.csv(paste("http://pic-sure-hpds-nhanes:8080/query", queryId, "result", sep="/")))
}

runQueryAndGetResult <- function(selectedFields = NULL, requiredFields = NULL, numericFilters = NULL, categoryFilters = NULL){
    queryStatus <- query(selectedFields, requiredFields, numericFilters, categoryFilters)
    while(queryStatus$status == "RUNNING" || queryStatus$status == "PENDING"){
        Sys.sleep(.1)
        queryStatus <- getStatus(queryStatus$id)
    }
    if(queryStatus$status == "SUCCESS" && queryStatus$numRows > 0){
        print(paste(
            queryStatus$numRows, " rows and ", 
            queryStatus$numColumns, " columns ", 
            "completed in : ", (queryStatus$completedTime - queryStatus$queuedTime)/1000, " seconds", sep=""))
        return (getResult(queryStatus$id))
    }else{
        queryStatus
    }
}


# Let's start by looking at the whole dictionary file

## Notice that the important data is all here, if it's continuous we get min and max values, if it's categorical we get all the available categories for filtering. We also get a count of observations, at this point this is a patient count only because we haven't dealt with temporal relationships yet.

In [None]:
dictionary("demographics")

# Let's look at something more interesting... all variables related to "blood"


In [None]:
dictionary("blood")

# OK, so how about all that data for "blood"? 

### Notice here we just pass the result of our dictionary search, but by using the convenient dictionaryFields function that handles escaping our terms for us and building a vector of fields. 

In [None]:

data = runQueryAndGetResult(dictionaryFields("blood"))

In [None]:
data

# OK, so how about all that data for "blood" but only where patients have a Basophils percent? 

### Notice we are requiring patients to have all fields that match the search "Basophils percent". If we try this for the search term "blood" it will require all fields that have blood in their path, which matches no patients so we would get no results.

In [None]:
data = runQueryAndGetResult(selectedFields=dictionaryFields("blood"), 
                     requiredFields=dictionaryFields("Basophils percent"))


In [None]:
data

# Right... 33605 patients, let's try the same query, but only age 5-25

In [None]:
numFilters <- c(numericFilter("\\demographics\\AGE\\",5,25))
data = runQueryAndGetResult(selectedFields=dictionaryFields("blood"), 
                     requiredFields=dictionaryFields("Basophils percent"), 
                     numericFilters=numFilters)

In [None]:
data

# This is better, what about only Male patients who are of white or mexican decent and we want to add all lab values?

In [None]:
numFilters <- c(numericFilter("\\demographics\\AGE\\",5,25))
catFilters <- c(categoryFilter("\\demographics\\RACE\\", c("white", "mexican")))
data = runQueryAndGetResult(selectedFields=union(dictionaryFields("laboratory"), dictionaryFields("blood")), 
                     requiredFields=dictionaryFields("Basophils percent"), 
                     numericFilters=numFilters, categoryFilters=catFilters)

In [None]:
data

# Now if we really just want all the data, we can do something like this

In [None]:
data = runQueryAndGetResult(dictionaryFields(""))

In [None]:
data