Skip to content

Commit

Permalink
Merge pull request #16 from joshwlambert/pkg_rev
Browse files Browse the repository at this point in the history
Package review, updates and addition of translation and cleaning functions
  • Loading branch information
sarahollis committed May 15, 2023
2 parents f7cedb7 + f6daa6f commit ba1f8a8
Show file tree
Hide file tree
Showing 143 changed files with 7,621 additions and 1,209 deletions.
11 changes: 6 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,16 @@ Imports:
stringr,
tibble,
tidyr,
tidyverse,
urltools
urltools,
janitor
Suggests:
devtools,
testthat
devtools,
testthat (>= 3.0.0)
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
Roxygen: list(markdown = TRUE)
Depends:
R (>= 2.10)
Config/testthat/edition: 3
24 changes: 21 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,12 +1,32 @@
# Generated by roxygen2: do not edit by hand

export(cases_from_contacts)
export(check_godata_url)
export(clean_case_address_history)
export(clean_case_med_history)
export(clean_case_vax_history)
export(clean_cases)
export(clean_contact_address_history)
export(clean_contact_vax_history)
export(clean_contacts)
export(clean_contacts_of_contacts)
export(clean_contacts_of_contacts_address_history)
export(clean_contacts_of_contacts_vax_history)
export(clean_events)
export(clean_followups)
export(clean_locations)
export(clean_relationships)
export(clean_teams)
export(clean_users)
export(contacts_per_case)
export(expand_location_tree)
export(exposures_per_case)
export(get_access_token)
export(get_active_outbreak)
export(get_all_outbreaks)
export(get_cases)
export(get_cases_epiwindow)
export(get_cases_questionnaire)
export(get_clusters)
export(get_contacts)
export(get_contacts_epiwindow)
Expand All @@ -29,18 +49,16 @@ export(get_users)
export(mongify_date)
export(null2na)
export(set_active_outbreak)
export(translate_categories)
import(data.table)
import(dplyr)
import(httr)
import(jsonlite)
import(lubridate)
import(purrr)
import(stringr)
import(tibble)
import(tidyr)
import(urltools)
importFrom(jsonlite,fromJSON)
importFrom(magrittr,"%>%")
importFrom(purrr,pluck)
importFrom(stringr,str_split)
importFrom(utils,read.csv)
120 changes: 73 additions & 47 deletions R/batch_downloader.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
#'
#' A housekeeping function to do batch downloads.
#'
#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!
#' @param url Insert the base URL for your instance of Go.Data here. Don't
#' forget the forward slash "/" at end!
#' @param username The email address for your Go.Data login.
#' @param password The password for your Go.Data login
#' @param api_call_n The API url to get the number of records.
#' @param api_call_get The API url to GET the records.
#' @param batch_size Specifies the number of records to retrieve in each iteration.
#' @param batch_size Specifies the number of records to retrieve in each
#' iteration.
#'
#' @return
#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting.
#' Returns a data frame. Some fields, such as addresses, hospitalization
#' history, and questionnaire fields may require further unnesting. See
#' `\link[tidyr]{nest}` for assitance with unnesting.
#'
#' @examples
#' \dontrun{
Expand All @@ -19,66 +23,88 @@
#' password <- "mypassword"
#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b"
#'
#' cases <- get_cases(url=url,
#' username=username,
#' password=password,
#' outbreak_id=outbreak_id)
#' cases <- get_cases(
#' url = url,
#' username = username,
#' password = password,
#' outbreak_id = outbreak_id
#' )
#' }
#' @importFrom magrittr %>%
#' @import dplyr
#' @import tidyr
#' @import httr
#' @import tibble
#' @importFrom jsonlite fromJSON
#' @importFrom purrr pluck
#'
#'
batch_downloader <- function(url = url,
username = username,
password = password,
api_call_n = api_call_n,
api_call_get = api_call_get,
batch_size = batch_size) {
batch_downloader <- function(url,
username,
password,
api_call_n,
api_call_get,
batch_size) {

num_record_request <- httr::GET(
paste0(api_call_n),
httr::add_headers(
Authorization = paste("Bearer", get_access_token(
url = url,
username = username,
password = password
), sep = " ")))

#get total number of records
df_n <- GET(paste0(api_call_n),
add_headers(Authorization = paste("Bearer", get_access_token(url=url, username=username, password=password), sep = " "))) %>%
content(as="text") %>%
fromJSON(flatten=TRUE) %>%
unlist() %>%
unname()
num_record_content <- httr::content(num_record_request, as = "text")

num_records <- jsonlite::fromJSON(num_record_content, flatten = TRUE)
num_records <- num_records$count

#Import records in batches
df <- tibble()
batch_size <- batch_size # number of records to import per iteration
skip <-0
df <- tibble::tibble()
skip <- 0
message("****************************")

#Download records in batches, and then append them into a single dataset
while (skip < df_n) {
while (skip < num_records) {

#Progress message
if (df_n <= batch_size) message(paste0("...downloading records 1 to ",df_n))
if (df_n > batch_size) message(paste0("...downloading records ", as.character(skip+1, scientific = FALSE), " to ", format(skip+batch_size, scientific = FALSE)))
if (num_records <= batch_size) {
message(paste0("...downloading records 1 to ", num_records))
}
if (num_records > batch_size) {
message(
paste0(
"...downloading records ",
as.character(skip + 1, scientific = FALSE),
" to ",
format(skip + batch_size, scientific = FALSE)
)
)
}

#fetch the batch of records
df.i <- GET(paste0(api_call_get,
"?filter={%22limit%22:",format(batch_size, scientific = FALSE),",%22skip%22:",format(skip, scientific = FALSE),"}"),
add_headers(Authorization = paste("Bearer", get_access_token(url=url, username=username, password=password), sep = " "))) %>%
content(as='text') %>%
fromJSON( flatten=TRUE) %>%
as_tibble()
record_request <- httr::GET(
paste0(
api_call_get,
"?filter={%22limit%22:",
format(batch_size, scientific = FALSE),
",%22skip%22:",
format(skip, scientific = FALSE),
"}"
),
httr::add_headers(
Authorization = paste("Bearer", get_access_token(
url = url,
username = username,
password = password),
sep = " ")
)
)

record_content <- httr::content(record_request, as = "text")

records <- jsonlite::fromJSON(record_content, flatten = TRUE)

records <- tibble::as_tibble(records)

#append the new batch of records to the existing data frame
df <- df %>%
bind_rows(df.i)
df <- dplyr::bind_rows(df, records)

#update numbers for the next iteration
skip <- skip + batch_size
rm(df.i)
records <- NULL
}
rm(batch_size, skip, df_n)
return(df)
}


106 changes: 106 additions & 0 deletions R/cases_from_contacts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#' Pull out all cases that used to be contacts
#'
#' @param cases_clean The cleaned case data. Case data is returned by
#' [`get_cases()`] and cleaned by [`clean_cases()`].
#'
#' @return A tibble containing the cases that used to be contacts.
#' @export
#'
#' @examples
#' \dontrun{
#' url <- "https://MyGoDataServer.com/"
#' username <- "myemail@email.com"
#' password <- "mypassword"
#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b"
#'
#' cases <- get_cases(
#' url = url,
#' username = username,
#' password = password,
#' outbreak_id = outbreak_id
#' )
#'
#' # other cleaned data required for `clean_cases()`
#' cases_vacc_history_clean <- clean_case_vax_history(cases = cases)
#' cases_address_history_clean <- clean_case_address_history(cases = cases)
#' cases_dateranges_history_clean <- clean_case_med_history(cases = cases)
#'
#' cases_clean <- clean_cases(
#' cases = cases,
#' cases_address_history_clean = cases_address_history_clean,
#' cases_vacc_history_clean = cases_vacc_history_clean,
#' cases_dateranges_history_clean = cases_dateranges_history_clean
#' )
#'
#' cases_from_contacts <- cases_from_contacts(cases_clean = cases_clean)
#' }
cases_from_contacts <- function(cases_clean) {

contacts_becoming_cases <- dplyr::filter(
.data = cases_clean,
.data$was_contact == TRUE
)

# set this status to became case and no longer active
contacts_becoming_cases <- dplyr::mutate(
.data = contacts_becoming_cases,
follow_up_status = "BECAME_CASE",
was_case = NA,
date_of_last_contact = NA,
follow_up_team_id = NA,
relationship_exposure_type = NA,
relationship_context_of_transmission = NA,
relationship_exposure_duration = NA,
relationship_exposure_frequency = NA,
relationship_certainty_level = NA,
relationship_cluster_id = NA,
)

# organize order of vars, only bring in what we need, take away confusing vars
contacts_becoming_cases <- dplyr::select(
.data = contacts_becoming_cases,
"id", # identifier
"visual_id", # identifier
"classification", # identifier
"follow_up_status", # identifier
"first_name", # demographics
"middle_name", # demographics
"last_name", # demographics
"gender", # demographics
"age", # demographics
"age_class", # demographics
"occupation", # demographics
"pregnancy_status", # demographics
"date_of_reporting", # dates
"date_of_last_contact", # dates
"date_of_burial", # dates
"risk_level", # epi
"risk_reason", # epi
"responsible_user_id", # assigned contact tracer
"follow_up_team_id", # assigned contact tracer
dplyr::matches("^admin_.*name$"), # address
"lat", # address
"long", # address
"address", # address
"postal_code", # address
"city", # address
"telephone", # address
"email", # address
"vaccinated",
"outcome", # outcome
"date_of_outcome", # outcome
"relationship_exposure_type",
"relationship_context_of_transmission",
"relationship_exposure_duration",
"relationship_exposure_frequency",
"relationship_certainty_level",
"relationship_cluster_id",
"location_id", # uuid in case need later for joining of whatever sort
"created_by", # record modification
"datetime_created_at", # record modification
"updated_by", # record modification
"datetime_updated_at" # record modification
)

return(contacts_becoming_cases)
}
22 changes: 11 additions & 11 deletions R/check_godata_url.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,28 @@
#' is valid. This is a housekeeping function
#' used in many of the other `godataR` functions.
#'
#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!
#' @param url Insert the base URL for your instance of Go.Data here. Don't
#' forget the forward slash "/" at end!
#' @param success_code A numeric specifying which code is returned by the API
#' when successfully returning the status code. Default is 200.
#'
#' @return
#' Boolean, where `TRUE` indicates a valid URL.
#' @examples
#' \dontrun{
#' url <- "https://MyGoDataServer.com/"
#' check_godata_url(url=url)
#' check_godata_url(url = url)
#' }
#' @importFrom magrittr %>%
#' @import httr
#' @importFrom purrr pluck
#' @export
check_godata_url <- function(url=url) {
check_godata_url <- function(url,
success_code = 200) {

# Get status code for version check
status_code <- GET(paste0(url,"api/system-settings/version")) %>%
pluck("status_code")
status_code <- httr::GET(paste0(url, "api/system-settings/version"))

# create boolean based on status code being 200 (success)
check <- (status_code==200)
status_code <- purrr::pluck(status_code, "status_code")

return(check)
# return boolean based on status code being a success
return(isTRUE(status_code == success_code))

}
Loading

0 comments on commit ba1f8a8

Please sign in to comment.