Merge pull request #16 from joshwlambert/pkg_rev

Package review, updates and addition of translation and cleaning functions
WorldHealthOrganization · May 15, 2023 · ba1f8a8 · ba1f8a8
2 parents f7cedb7 + f6daa6f
commit ba1f8a8
Show file tree

Hide file tree

Showing 143 changed files with 7,621 additions and 1,209 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -28,15 +28,16 @@ Imports:
     stringr,
     tibble,
     tidyr,
-    tidyverse,
-    urltools
+    urltools,
+    janitor
 Suggests:
-    devtools, 
-    testthat
+    devtools,
+    testthat (>= 3.0.0)
 License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.3
 Roxygen: list(markdown = TRUE)
 Depends: 
     R (>= 2.10)
+Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,12 +1,32 @@
 # Generated by roxygen2: do not edit by hand
 
+export(cases_from_contacts)
 export(check_godata_url)
+export(clean_case_address_history)
+export(clean_case_med_history)
+export(clean_case_vax_history)
+export(clean_cases)
+export(clean_contact_address_history)
+export(clean_contact_vax_history)
+export(clean_contacts)
+export(clean_contacts_of_contacts)
+export(clean_contacts_of_contacts_address_history)
+export(clean_contacts_of_contacts_vax_history)
+export(clean_events)
+export(clean_followups)
+export(clean_locations)
+export(clean_relationships)
+export(clean_teams)
+export(clean_users)
+export(contacts_per_case)
 export(expand_location_tree)
+export(exposures_per_case)
 export(get_access_token)
 export(get_active_outbreak)
 export(get_all_outbreaks)
 export(get_cases)
 export(get_cases_epiwindow)
+export(get_cases_questionnaire)
 export(get_clusters)
 export(get_contacts)
 export(get_contacts_epiwindow)
@@ -29,18 +49,16 @@ export(get_users)
 export(mongify_date)
 export(null2na)
 export(set_active_outbreak)
+export(translate_categories)
 import(data.table)
 import(dplyr)
 import(httr)
 import(jsonlite)
 import(lubridate)
 import(purrr)
 import(stringr)
-import(tibble)
 import(tidyr)
 import(urltools)
 importFrom(jsonlite,fromJSON)
 importFrom(magrittr,"%>%")
 importFrom(purrr,pluck)
-importFrom(stringr,str_split)
-importFrom(utils,read.csv)
diff --git a/R/batch_downloader.R b/R/batch_downloader.R
@@ -2,15 +2,19 @@
 #'
 #' A housekeeping function to do batch downloads.
 #'
-#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!
+#' @param url Insert the base URL for your instance of Go.Data here. Don't
+#' forget the forward slash "/" at end!
 #' @param username The email address for your Go.Data login.
 #' @param password The password for your Go.Data login
 #' @param api_call_n The API url to get the number of records.
 #' @param api_call_get The API url to GET the records.
-#' @param batch_size Specifies the number of records to retrieve in each iteration.
+#' @param batch_size Specifies the number of records to retrieve in each
+#' iteration.
 #'
 #' @return
-#' Returns a data frame. Some fields, such as addresses, hospitalization history, and questionnaire fields may require further unnesting. See \code{\link[tidyr]{nest}} for assitance with unnesting.
+#' Returns a data frame. Some fields, such as addresses, hospitalization
+#' history, and questionnaire fields may require further unnesting. See
+#' `\link[tidyr]{nest}` for assitance with unnesting.
 #'
 #' @examples
 #' \dontrun{
@@ -19,66 +23,88 @@
 #' password <- "mypassword"
 #' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b"
 #'
-#' cases <- get_cases(url=url,
-#'                    username=username,
-#'                    password=password,
-#'                    outbreak_id=outbreak_id)
+#' cases <- get_cases(
+#'   url = url,
+#'   username = username,
+#'   password = password,
+#'   outbreak_id = outbreak_id
+#' )
 #' }
-#' @importFrom magrittr %>%
-#' @import dplyr
-#' @import tidyr
-#' @import httr
-#' @import tibble
-#' @importFrom jsonlite fromJSON
-#' @importFrom purrr pluck
-#'
-#'
-batch_downloader <- function(url = url,
-                             username = username,
-                             password = password,
-                             api_call_n = api_call_n,
-                             api_call_get = api_call_get,
-                             batch_size = batch_size) {
+batch_downloader <- function(url,
+                             username,
+                             password,
+                             api_call_n,
+                             api_call_get,
+                             batch_size) {
+
+  num_record_request <- httr::GET(
+    paste0(api_call_n),
+    httr::add_headers(
+      Authorization = paste("Bearer", get_access_token(
+        url = url,
+        username = username,
+        password = password
+      ), sep = " ")))
 
-  #get total number of records
-  df_n <- GET(paste0(api_call_n),
-              add_headers(Authorization = paste("Bearer", get_access_token(url=url, username=username, password=password), sep = " "))) %>%
-    content(as="text") %>%
-    fromJSON(flatten=TRUE) %>%
-    unlist() %>%
-    unname()
+  num_record_content <- httr::content(num_record_request, as = "text")
+
+  num_records <- jsonlite::fromJSON(num_record_content, flatten = TRUE)
+  num_records <- num_records$count
 
   #Import records in batches
-  df <- tibble()
-  batch_size <- batch_size # number of records to import per iteration
-  skip <-0
+  df <- tibble::tibble()
+  skip <- 0
   message("****************************")
 
   #Download records in batches, and then append them into a single dataset
-  while (skip < df_n) {
+  while (skip < num_records) {
 
     #Progress message
-    if (df_n <= batch_size) message(paste0("...downloading records 1 to ",df_n))
-    if (df_n > batch_size)     message(paste0("...downloading records ", as.character(skip+1, scientific = FALSE), " to ", format(skip+batch_size, scientific = FALSE)))
+    if (num_records <= batch_size) {
+      message(paste0("...downloading records 1 to ", num_records))
+    }
+    if (num_records > batch_size) {
+      message(
+        paste0(
+          "...downloading records ",
+          as.character(skip + 1, scientific = FALSE),
+          " to ",
+          format(skip + batch_size, scientific = FALSE)
+        )
+      )
+    }
 
     #fetch the batch of records
-    df.i <- GET(paste0(api_call_get,
-                       "?filter={%22limit%22:",format(batch_size, scientific = FALSE),",%22skip%22:",format(skip, scientific = FALSE),"}"),
-                add_headers(Authorization = paste("Bearer", get_access_token(url=url, username=username, password=password), sep = " "))) %>%
-      content(as='text') %>%
-      fromJSON( flatten=TRUE) %>%
-      as_tibble()
+    record_request <- httr::GET(
+      paste0(
+        api_call_get,
+        "?filter={%22limit%22:",
+        format(batch_size, scientific = FALSE),
+        ",%22skip%22:",
+        format(skip, scientific = FALSE),
+        "}"
+      ),
+      httr::add_headers(
+        Authorization = paste("Bearer", get_access_token(
+          url = url,
+          username = username,
+          password = password),
+          sep = " ")
+      )
+    )
+
+    record_content <- httr::content(record_request, as = "text")
+
+    records <- jsonlite::fromJSON(record_content, flatten = TRUE)
+
+    records <- tibble::as_tibble(records)
 
     #append the new batch of records to the existing data frame
-    df <- df %>%
-      bind_rows(df.i)
+    df <- dplyr::bind_rows(df, records)
 
     #update numbers for the next iteration
     skip <- skip + batch_size
-    rm(df.i)
+    records <- NULL
   }
-  rm(batch_size, skip, df_n)
   return(df)
 }
-
-
diff --git a/R/cases_from_contacts.R b/R/cases_from_contacts.R
@@ -0,0 +1,106 @@
+#' Pull out all cases that used to be contacts
+#'
+#' @param cases_clean The cleaned case data. Case data is returned by
+#' [`get_cases()`] and cleaned by [`clean_cases()`].
+#'
+#' @return A tibble containing the cases that used to be contacts.
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' url <- "https://MyGoDataServer.com/"
+#' username <- "myemail@email.com"
+#' password <- "mypassword"
+#' outbreak_id <- "3b5554d7-2c19-41d0-b9af-475ad25a382b"
+#'
+#' cases <- get_cases(
+#'   url = url,
+#'   username = username,
+#'   password = password,
+#'   outbreak_id = outbreak_id
+#' )
+#'
+#' # other cleaned data required for `clean_cases()`
+#' cases_vacc_history_clean <- clean_case_vax_history(cases = cases)
+#' cases_address_history_clean <- clean_case_address_history(cases = cases)
+#' cases_dateranges_history_clean <- clean_case_med_history(cases = cases)
+#'
+#' cases_clean <- clean_cases(
+#'   cases = cases,
+#'   cases_address_history_clean = cases_address_history_clean,
+#'   cases_vacc_history_clean = cases_vacc_history_clean,
+#'   cases_dateranges_history_clean = cases_dateranges_history_clean
+#' )
+#'
+#' cases_from_contacts <- cases_from_contacts(cases_clean = cases_clean)
+#' }
+cases_from_contacts <- function(cases_clean) {
+
+  contacts_becoming_cases <- dplyr::filter(
+    .data = cases_clean,
+    .data$was_contact == TRUE
+  )
+
+  # set this status to became case and no longer active
+  contacts_becoming_cases <- dplyr::mutate(
+    .data = contacts_becoming_cases,
+    follow_up_status = "BECAME_CASE",
+    was_case = NA,
+    date_of_last_contact = NA,
+    follow_up_team_id = NA,
+    relationship_exposure_type = NA,
+    relationship_context_of_transmission = NA,
+    relationship_exposure_duration = NA,
+    relationship_exposure_frequency = NA,
+    relationship_certainty_level = NA,
+    relationship_cluster_id = NA,
+  )
+
+  # organize order of vars, only bring in what we need, take away confusing vars
+  contacts_becoming_cases <- dplyr::select(
+    .data = contacts_becoming_cases,
+    "id", # identifier
+    "visual_id", # identifier
+    "classification", # identifier
+    "follow_up_status", # identifier
+    "first_name", # demographics
+    "middle_name", # demographics
+    "last_name", # demographics
+    "gender", # demographics
+    "age", # demographics
+    "age_class", # demographics
+    "occupation", # demographics
+    "pregnancy_status", # demographics
+    "date_of_reporting", # dates
+    "date_of_last_contact", # dates
+    "date_of_burial", # dates
+    "risk_level", # epi
+    "risk_reason", # epi
+    "responsible_user_id", # assigned contact tracer
+    "follow_up_team_id", # assigned contact tracer
+    dplyr::matches("^admin_.*name$"), # address
+    "lat", # address
+    "long", # address
+    "address", # address
+    "postal_code", # address
+    "city", # address
+    "telephone", # address
+    "email", # address
+    "vaccinated",
+    "outcome", # outcome
+    "date_of_outcome", # outcome
+    "relationship_exposure_type",
+    "relationship_context_of_transmission",
+    "relationship_exposure_duration",
+    "relationship_exposure_frequency",
+    "relationship_certainty_level",
+    "relationship_cluster_id",
+    "location_id", # uuid in case need later for joining of whatever sort
+    "created_by", # record modification
+    "datetime_created_at", # record modification
+    "updated_by", # record modification
+    "datetime_updated_at" # record modification
+  )
+
+  return(contacts_becoming_cases)
+}
diff --git a/R/check_godata_url.R b/R/check_godata_url.R
@@ -4,28 +4,28 @@
 #' is valid. This is a housekeeping function
 #' used in many of the other `godataR` functions.
 #'
-#' @param url Insert the base URL for your instance of Go.Data here. Don't forget the forward slash "/" at end!
+#' @param url Insert the base URL for your instance of Go.Data here. Don't
+#' forget the forward slash "/" at end!
+#' @param success_code A numeric specifying which code is returned by the API
+#' when successfully returning the status code. Default is 200.
 #'
 #' @return
 #' Boolean, where `TRUE` indicates a valid URL.
 #' @examples
 #' \dontrun{
 #' url <- "https://MyGoDataServer.com/"
-#' check_godata_url(url=url)
+#' check_godata_url(url = url)
 #' }
-#' @importFrom magrittr %>%
-#' @import httr
-#' @importFrom purrr pluck
 #' @export
-check_godata_url <- function(url=url) {
+check_godata_url <- function(url,
+                             success_code = 200) {
 
   # Get status code for version check
-  status_code <- GET(paste0(url,"api/system-settings/version")) %>%
-    pluck("status_code")
+  status_code <- httr::GET(paste0(url, "api/system-settings/version"))
 
-  # create boolean based on status code being 200 (success)
-  check <- (status_code==200)
+  status_code <- purrr::pluck(status_code, "status_code")
 
-  return(check)
+  # return boolean based on status code being a success
+  return(isTRUE(status_code == success_code))
 
 }